📄 hugetlb.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * Generic hugetlb support. * (C) William Irwin, April 2004 */#include <linux/gfp.h>#include <linux/list.h>#include <linux/init.h>#include <linux/module.h>#include <linux/mm.h>#include <linux/seq_file.h>#include <linux/sysctl.h>#include <linux/highmem.h>#include <linux/mmu_notifier.h>#include <linux/nodemask.h>#include <linux/pagemap.h>#include <linux/mempolicy.h>#include <linux/cpuset.h>#include <linux/mutex.h>#include <linux/bootmem.h>#include <linux/sysfs.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/io.h>#include <linux/hugetlb.h>#include "internal.h"const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;static gfp_t htlb_alloc_mask = GFP_HIGHUSER;unsigned long hugepages_treat_as_movable;static int max_hstate;unsigned int default_hstate_idx;struct hstate hstates[HUGE_MAX_HSTATE];__initdata LIST_HEAD(huge_boot_pages);/* for command line parsing */static struct hstate * __initdata parsed_hstate;static unsigned long __initdata default_hstate_max_huge_pages;static unsigned long __initdata default_hstate_size;#define for_each_hstate(h) \	for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)/* * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages */static DEFINE_SPINLOCK(hugetlb_lock);/* * Region tracking -- allows tracking of reservations and instantiated pages *                    across the pages in a mapping. * * The region data structures are protected by a combination of the mmap_sem * and the hugetlb_instantion_mutex.  To access or modify a region the caller * must either hold the mmap_sem for write, or the mmap_sem for read and * the hugetlb_instantiation mutex: * * 	down_write(&mm->mmap_sem); * or * 	down_read(&mm->mmap_sem); * 	mutex_lock(&hugetlb_instantiation_mutex); */struct file_region {	struct list_head link;	long from;	long to;};static long region_add(struct list_head *head, long f, long t){	struct file_region *rg, *nrg, *trg;	/* Locate the region we are either in or before. */	list_for_each_entry(rg, head, link)		if (f <= rg->to)			break;	/* Round our left edge to the current segment if it encloses us. */	if (f > rg->from)		f = rg->from;	/* Check for and consume any regions we now overlap with. */	nrg = rg;	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {		if (&rg->link == head)			break;		if (rg->from > t)			break;		/* If this area reaches higher then extend our area to		 * include it completely.  If this is not the first area		 * which we intend to reuse, free it. */		if (rg->to > t)			t = rg->to;		if (rg != nrg) {			list_del(&rg->link);			kfree(rg);		}	}	nrg->from = f;	nrg->to = t;	return 0;}static long region_chg(struct list_head *head, long f, long t){	struct file_region *rg, *nrg;	long chg = 0;	/* Locate the region we are before or in. */	list_for_each_entry(rg, head, link)		if (f <= rg->to)			break;	/* If we are below the current region then a new region is required.	 * Subtle, allocate a new region at the position but make it zero	 * size such that we can guarantee to record the reservation. */	if (&rg->link == head || t < rg->from) {		nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);		if (!nrg)			return -ENOMEM;		nrg->from = f;		nrg->to   = f;		INIT_LIST_HEAD(&nrg->link);		list_add(&nrg->link, rg->link.prev);		return t - f;	}	/* Round our left edge to the current segment if it encloses us. */	if (f > rg->from)		f = rg->from;	chg = t - f;	/* Check for and consume any regions we now overlap with. */	list_for_each_entry(rg, rg->link.prev, link) {		if (&rg->link == head)			break;		if (rg->from > t)			return chg;		/* We overlap with this area, if it extends futher than		 * us then we must extend ourselves.  Account for its		 * existing reservation. */		if (rg->to > t) {			chg += rg->to - t;			t = rg->to;		}		chg -= rg->to - rg->from;	}	return chg;}static long region_truncate(struct list_head *head, long end){	struct file_region *rg, *trg;	long chg = 0;	/* Locate the region we are either in or before. */	list_for_each_entry(rg, head, link)		if (end <= rg->to)			break;	if (&rg->link == head)		return 0;	/* If we are in the middle of a region then adjust it. */	if (end > rg->from) {		chg = rg->to - end;		rg->to = end;		rg = list_entry(rg->link.next, typeof(*rg), link);	}	/* Drop any remaining regions. */	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {		if (&rg->link == head)			break;		chg += rg->to - rg->from;		list_del(&rg->link);		kfree(rg);	}	return chg;}static long region_count(struct list_head *head, long f, long t){	struct file_region *rg;	long chg = 0;	/* Locate each segment we overlap with, and count that overlap. */	list_for_each_entry(rg, head, link) {		int seg_from;		int seg_to;		if (rg->to <= f)			continue;		if (rg->from >= t)			break;		seg_from = max(rg->from, f);		seg_to = min(rg->to, t);		chg += seg_to - seg_from;	}	return chg;}/* * Convert the address within this vma to the page offset within * the mapping, in pagecache page units; huge pages here. */static pgoff_t vma_hugecache_offset(struct hstate *h,			struct vm_area_struct *vma, unsigned long address){	return ((address - vma->vm_start) >> huge_page_shift(h)) +			(vma->vm_pgoff >> huge_page_order(h));}/* * Return the size of the pages allocated when backing a VMA. In the majority * cases this will be same size as used by the page table entries. */unsigned long vma_kernel_pagesize(struct vm_area_struct *vma){	struct hstate *hstate;	if (!is_vm_hugetlb_page(vma))		return PAGE_SIZE;	hstate = hstate_vma(vma);	return 1UL << (hstate->order + PAGE_SHIFT);}/* * Return the page size being used by the MMU to back a VMA. In the majority * of cases, the page size used by the kernel matches the MMU size. On * architectures where it differs, an architecture-specific version of this * function is required. */#ifndef vma_mmu_pagesizeunsigned long vma_mmu_pagesize(struct vm_area_struct *vma){	return vma_kernel_pagesize(vma);}#endif/* * Flags for MAP_PRIVATE reservations.  These are stored in the bottom * bits of the reservation map pointer, which are always clear due to * alignment. */#define HPAGE_RESV_OWNER    (1UL << 0)#define HPAGE_RESV_UNMAPPED (1UL << 1)#define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)/* * These helpers are used to track how many pages are reserved for * faults in a MAP_PRIVATE mapping. Only the process that called mmap() * is guaranteed to have their future faults succeed. * * With the exception of reset_vma_resv_huge_pages() which is called at fork(), * the reserve counters are updated with the hugetlb_lock held. It is safe * to reset the VMA at fork() time as it is not in use yet and there is no * chance of the global counters getting corrupted as a result of the values. * * The private mapping reservation is represented in a subtly different * manner to a shared mapping.  A shared mapping has a region map associated * with the underlying file, this region map represents the backing file * pages which have ever had a reservation assigned which this persists even * after the page is instantiated.  A private mapping has a region map * associated with the original mmap which is attached to all VMAs which * reference it, this region map represents those offsets which have consumed * reservation ie. where pages have been instantiated. */static unsigned long get_vma_private_data(struct vm_area_struct *vma){	return (unsigned long)vma->vm_private_data;}static void set_vma_private_data(struct vm_area_struct *vma,							unsigned long value){	vma->vm_private_data = (void *)value;}struct resv_map {	struct kref refs;	struct list_head regions;};static struct resv_map *resv_map_alloc(void){	struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);	if (!resv_map)		return NULL;	kref_init(&resv_map->refs);	INIT_LIST_HEAD(&resv_map->regions);	return resv_map;}static void resv_map_release(struct kref *ref){	struct resv_map *resv_map = container_of(ref, struct resv_map, refs);	/* Clear out any active regions before we release the map. */	region_truncate(&resv_map->regions, 0);	kfree(resv_map);}static struct resv_map *vma_resv_map(struct vm_area_struct *vma){	VM_BUG_ON(!is_vm_hugetlb_page(vma));	if (!(vma->vm_flags & VM_SHARED))		return (struct resv_map *)(get_vma_private_data(vma) &							~HPAGE_RESV_MASK);	return NULL;}static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map){	VM_BUG_ON(!is_vm_hugetlb_page(vma));	VM_BUG_ON(vma->vm_flags & VM_SHARED);	set_vma_private_data(vma, (get_vma_private_data(vma) &				HPAGE_RESV_MASK) | (unsigned long)map);}static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags){	VM_BUG_ON(!is_vm_hugetlb_page(vma));	VM_BUG_ON(vma->vm_flags & VM_SHARED);	set_vma_private_data(vma, get_vma_private_data(vma) | flags);}static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag){	VM_BUG_ON(!is_vm_hugetlb_page(vma));	return (get_vma_private_data(vma) & flag) != 0;}/* Decrement the reserved pages in the hugepage pool by one */static void decrement_hugepage_resv_vma(struct hstate *h,			struct vm_area_struct *vma){	if (vma->vm_flags & VM_NORESERVE)		return;	if (vma->vm_flags & VM_SHARED) {		/* Shared mappings always use reserves */		h->resv_huge_pages--;	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {		/*		 * Only the process that called mmap() has reserves for		 * private mappings.		 */		h->resv_huge_pages--;	}}/* Reset counters to 0 and clear all HPAGE_RESV_* flags */void reset_vma_resv_huge_pages(struct vm_area_struct *vma){	VM_BUG_ON(!is_vm_hugetlb_page(vma));	if (!(vma->vm_flags & VM_SHARED))		vma->vm_private_data = (void *)0;}/* Returns true if the VMA has associated reserve pages */static int vma_has_reserves(struct vm_area_struct *vma){	if (vma->vm_flags & VM_SHARED)		return 1;	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))		return 1;	return 0;}static void clear_gigantic_page(struct page *page,			unsigned long addr, unsigned long sz){	int i;	struct page *p = page;	might_sleep();	for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) {		cond_resched();		clear_user_highpage(p, addr + i * PAGE_SIZE);	}}static void clear_huge_page(struct page *page,			unsigned long addr, unsigned long sz){	int i;	if (unlikely(sz > MAX_ORDER_NR_PAGES)) {		clear_gigantic_page(page, addr, sz);		return;	}	might_sleep();	for (i = 0; i < sz/PAGE_SIZE; i++) {		cond_resched();		clear_user_highpage(page + i, addr + i * PAGE_SIZE);	}}static void copy_gigantic_page(struct page *dst, struct page *src,			   unsigned long addr, struct vm_area_struct *vma){	int i;	struct hstate *h = hstate_vma(vma);	struct page *dst_base = dst;	struct page *src_base = src;	might_sleep();	for (i = 0; i < pages_per_huge_page(h); ) {		cond_resched();		copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);		i++;		dst = mem_map_next(dst, dst_base, i);		src = mem_map_next(src, src_base, i);	}}static void copy_huge_page(struct page *dst, struct page *src,			   unsigned long addr, struct vm_area_struct *vma){	int i;	struct hstate *h = hstate_vma(vma);	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {		copy_gigantic_page(dst, src, addr, vma);		return;	}	might_sleep();	for (i = 0; i < pages_per_huge_page(h); i++) {		cond_resched();		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);	}}static void enqueue_huge_page(struct hstate *h, struct page *page){	int nid = page_to_nid(page);	list_add(&page->lru, &h->hugepage_freelists[nid]);	h->free_huge_pages++;	h->free_huge_pages_node[nid]++;}static struct page *dequeue_huge_page(struct hstate *h){	int nid;	struct page *page = NULL;	for (nid = 0; nid < MAX_NUMNODES; ++nid) {		if (!list_empty(&h->hugepage_freelists[nid])) {			page = list_entry(h->hugepage_freelists[nid].next,					  struct page, lru);			list_del(&page->lru);			h->free_huge_pages--;			h->free_huge_pages_node[nid]--;			break;
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -