📄 memory.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* *  linux/mm/memory.c * *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds *//* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to implement. - Linus *//* * Ok, demand-loading was easy, shared pages a little bit tricker. Shared * pages started 02.12.91, seems to work. - Linus. * * Tested sharing by executing about 30 /bin/sh: under the old kernel it * would have taken more than the 6M I have free, but it worked well as * far as I could see. * * Also corrected some "invalidate()"s - I wasn't doing enough of them. *//* * Real VM (paging to/from disk) started 18.12.91. Much more work and * thought has to go into this. Oh, well.. * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why. *		Found it. Everything seems to work now. * 20.12.91  -  Ok, making the swap-device changeable like the root. *//* * 05.04.94  -  Multi-page memory management added for v1.1. * 		Idea by Alex Bligh (alex@cconcepts.co.uk) * * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG *		(Gerhard.Wichert@pdb.siemens.de) * * Aug/Sep 2004 Changed to four level page tables (Andi Kleen) */#include <linux/kernel_stat.h>#include <linux/mm.h>#include <linux/hugetlb.h>#include <linux/mman.h>#include <linux/swap.h>#include <linux/highmem.h>#include <linux/pagemap.h>#include <linux/rmap.h>#include <linux/module.h>#include <linux/delayacct.h>#include <linux/init.h>#include <linux/writeback.h>#include <linux/memcontrol.h>#include <linux/mmu_notifier.h>#include <linux/kallsyms.h>#include <linux/swapops.h>#include <linux/elf.h>#include <asm/pgalloc.h>#include <asm/uaccess.h>#include <asm/tlb.h>#include <asm/tlbflush.h>#include <asm/pgtable.h>#include "internal.h"#ifndef CONFIG_NEED_MULTIPLE_NODES/* use the per-pgdat data instead for discontigmem - mbligh */unsigned long max_mapnr;struct page *mem_map;EXPORT_SYMBOL(max_mapnr);EXPORT_SYMBOL(mem_map);#endifunsigned long num_physpages;/* * A number of key systems in x86 including ioremap() rely on the assumption * that high_memory defines the upper bound on direct map memory, then end * of ZONE_NORMAL.  Under CONFIG_DISCONTIG this means that max_low_pfn and * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL * and ZONE_HIGHMEM. */void * high_memory;EXPORT_SYMBOL(num_physpages);EXPORT_SYMBOL(high_memory);/* * Randomize the address space (stacks, mmaps, brk, etc.). * * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization, *   as ancient (libc5 based) binaries can segfault. ) */int randomize_va_space __read_mostly =#ifdef CONFIG_COMPAT_BRK					1;#else					2;#endifstatic int __init disable_randmaps(char *s){	randomize_va_space = 0;	return 1;}__setup("norandmaps", disable_randmaps);/* * If a p?d_bad entry is found while walking page tables, report * the error, before resetting entry to p?d_none.  Usually (but * very seldom) called out from the p?d_none_or_clear_bad macros. */void pgd_clear_bad(pgd_t *pgd){	pgd_ERROR(*pgd);	pgd_clear(pgd);}void pud_clear_bad(pud_t *pud){	pud_ERROR(*pud);	pud_clear(pud);}void pmd_clear_bad(pmd_t *pmd){	pmd_ERROR(*pmd);	pmd_clear(pmd);}/* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd){	pgtable_t token = pmd_pgtable(*pmd);	pmd_clear(pmd);	pte_free_tlb(tlb, token);	tlb->mm->nr_ptes--;}static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,				unsigned long addr, unsigned long end,				unsigned long floor, unsigned long ceiling){	pmd_t *pmd;	unsigned long next;	unsigned long start;	start = addr;	pmd = pmd_offset(pud, addr);	do {		next = pmd_addr_end(addr, end);		if (pmd_none_or_clear_bad(pmd))			continue;		free_pte_range(tlb, pmd);	} while (pmd++, addr = next, addr != end);	start &= PUD_MASK;	if (start < floor)		return;	if (ceiling) {		ceiling &= PUD_MASK;		if (!ceiling)			return;	}	if (end - 1 > ceiling - 1)		return;	pmd = pmd_offset(pud, start);	pud_clear(pud);	pmd_free_tlb(tlb, pmd);}static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,				unsigned long addr, unsigned long end,				unsigned long floor, unsigned long ceiling){	pud_t *pud;	unsigned long next;	unsigned long start;	start = addr;	pud = pud_offset(pgd, addr);	do {		next = pud_addr_end(addr, end);		if (pud_none_or_clear_bad(pud))			continue;		free_pmd_range(tlb, pud, addr, next, floor, ceiling);	} while (pud++, addr = next, addr != end);	start &= PGDIR_MASK;	if (start < floor)		return;	if (ceiling) {		ceiling &= PGDIR_MASK;		if (!ceiling)			return;	}	if (end - 1 > ceiling - 1)		return;	pud = pud_offset(pgd, start);	pgd_clear(pgd);	pud_free_tlb(tlb, pud);}/* * This function frees user-level page tables of a process. * * Must be called with pagetable lock held. */void free_pgd_range(struct mmu_gather *tlb,			unsigned long addr, unsigned long end,			unsigned long floor, unsigned long ceiling){	pgd_t *pgd;	unsigned long next;	unsigned long start;	/*	 * The next few lines have given us lots of grief...	 *	 * Why are we testing PMD* at this top level?  Because often	 * there will be no work to do at all, and we'd prefer not to	 * go all the way down to the bottom just to discover that.	 *	 * Why all these "- 1"s?  Because 0 represents both the bottom	 * of the address space and the top of it (using -1 for the	 * top wouldn't help much: the masks would do the wrong thing).	 * The rule is that addr 0 and floor 0 refer to the bottom of	 * the address space, but end 0 and ceiling 0 refer to the top	 * Comparisons need to use "end - 1" and "ceiling - 1" (though	 * that end 0 case should be mythical).	 *	 * Wherever addr is brought up or ceiling brought down, we must	 * be careful to reject "the opposite 0" before it confuses the	 * subsequent tests.  But what about where end is brought down	 * by PMD_SIZE below? no, end can't go down to 0 there.	 *	 * Whereas we round start (addr) and ceiling down, by different	 * masks at different levels, in order to test whether a table	 * now has no other vmas using it, so can be freed, we don't	 * bother to round floor or end up - the tests don't need that.	 */	addr &= PMD_MASK;	if (addr < floor) {		addr += PMD_SIZE;		if (!addr)			return;	}	if (ceiling) {		ceiling &= PMD_MASK;		if (!ceiling)			return;	}	if (end - 1 > ceiling - 1)		end -= PMD_SIZE;	if (addr > end - 1)		return;	start = addr;	pgd = pgd_offset(tlb->mm, addr);	do {		next = pgd_addr_end(addr, end);		if (pgd_none_or_clear_bad(pgd))			continue;		free_pud_range(tlb, pgd, addr, next, floor, ceiling);	} while (pgd++, addr = next, addr != end);}void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,		unsigned long floor, unsigned long ceiling){	while (vma) {		struct vm_area_struct *next = vma->vm_next;		unsigned long addr = vma->vm_start;		/*		 * Hide vma from rmap and vmtruncate before freeing pgtables		 */		anon_vma_unlink(vma);		unlink_file_vma(vma);		if (is_vm_hugetlb_page(vma)) {			hugetlb_free_pgd_range(tlb, addr, vma->vm_end,				floor, next? next->vm_start: ceiling);		} else {			/*			 * Optimization: gather nearby vmas into one call down			 */			while (next && next->vm_start <= vma->vm_end + PMD_SIZE			       && !is_vm_hugetlb_page(next)) {				vma = next;				next = vma->vm_next;				anon_vma_unlink(vma);				unlink_file_vma(vma);			}			free_pgd_range(tlb, addr, vma->vm_end,				floor, next? next->vm_start: ceiling);		}		vma = next;	}}int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address){	pgtable_t new = pte_alloc_one(mm, address);	if (!new)		return -ENOMEM;	/*	 * Ensure all pte setup (eg. pte page lock and page clearing) are	 * visible before the pte is made visible to other CPUs by being	 * put into page tables.	 *	 * The other side of the story is the pointer chasing in the page	 * table walking code (when walking the page table without locking;	 * ie. most of the time). Fortunately, these data accesses consist	 * of a chain of data-dependent loads, meaning most CPUs (alpha	 * being the notable exception) will already guarantee loads are	 * seen in-order. See the alpha page table accessors for the	 * smp_read_barrier_depends() barriers in page table walking code.	 */	smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */	spin_lock(&mm->page_table_lock);	if (!pmd_present(*pmd)) {	/* Has another populated it ? */		mm->nr_ptes++;		pmd_populate(mm, pmd, new);		new = NULL;	}	spin_unlock(&mm->page_table_lock);	if (new)		pte_free(mm, new);	return 0;}int __pte_alloc_kernel(pmd_t *pmd, unsigned long address){	pte_t *new = pte_alloc_one_kernel(&init_mm, address);	if (!new)		return -ENOMEM;	smp_wmb(); /* See comment in __pte_alloc */	spin_lock(&init_mm.page_table_lock);	if (!pmd_present(*pmd)) {	/* Has another populated it ? */		pmd_populate_kernel(&init_mm, pmd, new);		new = NULL;	}	spin_unlock(&init_mm.page_table_lock);	if (new)		pte_free_kernel(&init_mm, new);	return 0;}static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss){	if (file_rss)		add_mm_counter(mm, file_rss, file_rss);	if (anon_rss)		add_mm_counter(mm, anon_rss, anon_rss);}/* * This function is called to print an error when a bad pte * is found. For example, we might have a PFN-mapped pte in * a region that doesn't allow it. * * The calling function must still handle the error. */static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,			  pte_t pte, struct page *page){	pgd_t *pgd = pgd_offset(vma->vm_mm, addr);	pud_t *pud = pud_offset(pgd, addr);	pmd_t *pmd = pmd_offset(pud, addr);	struct address_space *mapping;	pgoff_t index;	static unsigned long resume;	static unsigned long nr_shown;	static unsigned long nr_unshown;	/*	 * Allow a burst of 60 reports, then keep quiet for that minute;	 * or allow a steady drip of one report per second.	 */	if (nr_shown == 60) {		if (time_before(jiffies, resume)) {			nr_unshown++;			return;		}		if (nr_unshown) {			printk(KERN_ALERT				"BUG: Bad page map: %lu messages suppressed\n",				nr_unshown);			nr_unshown = 0;		}		nr_shown = 0;	}	if (nr_shown++ == 0)		resume = jiffies + 60 * HZ;	mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;	index = linear_page_index(vma, addr);	printk(KERN_ALERT		"BUG: Bad page map in process %s  pte:%08llx pmd:%08llx\n",		current->comm,		(long long)pte_val(pte), (long long)pmd_val(*pmd));	if (page) {		printk(KERN_ALERT		"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",		page, (void *)page->flags, page_count(page),		page_mapcount(page), page->mapping, page->index);	}	printk(KERN_ALERT		"addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",		(void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);	/*	 * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y	 */	if (vma->vm_ops)		print_symbol(KERN_ALERT "vma->vm_ops->fault: %s\n",				(unsigned long)vma->vm_ops->fault);	if (vma->vm_file && vma->vm_file->f_op)		print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n",				(unsigned long)vma->vm_file->f_op->mmap);	dump_stack();	add_taint(TAINT_BAD_PAGE);}static inline int is_cow_mapping(unsigned int flags){	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;}/* * vm_normal_page -- This function gets the "struct page" associated with a pte. * * "Special" mappings do not wish to be associated with a "struct page" (either * it doesn't exist, or it exists but they don't want to touch it). In this * case, NULL is returned here. "Normal" mappings do have a struct page. * * There are 2 broad cases. Firstly, an architecture may define a pte_special() * pte bit, in which case this function is trivial. Secondly, an architecture * may not have a spare pte bit, which requires a more complicated scheme, * described below. * * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a * special mapping (even if there are underlying and valid "struct pages"). * COWed pages of a VM_PFNMAP are always normal. * * The way we recognize COWed pages within VM_PFNMAP mappings is through the * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit * set, and the vm_pgoff will point to the first PFN mapped: thus every special * mapping will always honor the rule * *	pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) *
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -