📄 hugetlb.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
	avoidcopy = (page_count(old_page) == 1);	if (avoidcopy) {		set_huge_ptep_writable(vma, address, ptep);		return 0;	}	/*	 * If the process that created a MAP_PRIVATE mapping is about to	 * perform a COW due to a shared page count, attempt to satisfy	 * the allocation without using the existing reserves. The pagecache	 * page is used to determine if the reserve at this address was	 * consumed or not. If reserves were used, a partial faulted mapping	 * at the time of fork() could consume its reserves on COW instead	 * of the full address range.	 */	if (!(vma->vm_flags & VM_SHARED) &&			is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&			old_page != pagecache_page)		outside_reserve = 1;	page_cache_get(old_page);	new_page = alloc_huge_page(vma, address, outside_reserve);	if (IS_ERR(new_page)) {		page_cache_release(old_page);		/*		 * If a process owning a MAP_PRIVATE mapping fails to COW,		 * it is due to references held by a child and an insufficient		 * huge page pool. To guarantee the original mappers		 * reliability, unmap the page from child processes. The child		 * may get SIGKILLed if it later faults.		 */		if (outside_reserve) {			BUG_ON(huge_pte_none(pte));			if (unmap_ref_private(mm, vma, old_page, address)) {				BUG_ON(page_count(old_page) != 1);				BUG_ON(huge_pte_none(pte));				goto retry_avoidcopy;			}			WARN_ON_ONCE(1);		}		return -PTR_ERR(new_page);	}	spin_unlock(&mm->page_table_lock);	copy_huge_page(new_page, old_page, address, vma);	__SetPageUptodate(new_page);	spin_lock(&mm->page_table_lock);	ptep = huge_pte_offset(mm, address & huge_page_mask(h));	if (likely(pte_same(huge_ptep_get(ptep), pte))) {		/* Break COW */		huge_ptep_clear_flush(vma, address, ptep);		set_huge_pte_at(mm, address, ptep,				make_huge_pte(vma, new_page, 1));		/* Make the old page be freed below */		new_page = old_page;	}	page_cache_release(new_page);	page_cache_release(old_page);	return 0;}/* Return the pagecache page at a given address within a VMA */static struct page *hugetlbfs_pagecache_page(struct hstate *h,			struct vm_area_struct *vma, unsigned long address){	struct address_space *mapping;	pgoff_t idx;	mapping = vma->vm_file->f_mapping;	idx = vma_hugecache_offset(h, vma, address);	return find_lock_page(mapping, idx);}static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,			unsigned long address, pte_t *ptep, int write_access){	struct hstate *h = hstate_vma(vma);	int ret = VM_FAULT_SIGBUS;	pgoff_t idx;	unsigned long size;	struct page *page;	struct address_space *mapping;	pte_t new_pte;	/*	 * Currently, we are forced to kill the process in the event the	 * original mapper has unmapped pages from the child due to a failed	 * COW. Warn that such a situation has occured as it may not be obvious	 */	if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {		printk(KERN_WARNING			"PID %d killed due to inadequate hugepage pool\n",			current->pid);		return ret;	}	mapping = vma->vm_file->f_mapping;	idx = vma_hugecache_offset(h, vma, address);	/*	 * Use page lock to guard against racing truncation	 * before we get page_table_lock.	 */retry:	page = find_lock_page(mapping, idx);	if (!page) {		size = i_size_read(mapping->host) >> huge_page_shift(h);		if (idx >= size)			goto out;		page = alloc_huge_page(vma, address, 0);		if (IS_ERR(page)) {			ret = -PTR_ERR(page);			goto out;		}		clear_huge_page(page, address, huge_page_size(h));		__SetPageUptodate(page);		if (vma->vm_flags & VM_SHARED) {			int err;			struct inode *inode = mapping->host;			err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);			if (err) {				put_page(page);				if (err == -EEXIST)					goto retry;				goto out;			}			spin_lock(&inode->i_lock);			inode->i_blocks += blocks_per_huge_page(h);			spin_unlock(&inode->i_lock);		} else			lock_page(page);	}	/*	 * If we are going to COW a private mapping later, we examine the	 * pending reservations for this page now. This will ensure that	 * any allocations necessary to record that reservation occur outside	 * the spinlock.	 */	if (write_access && !(vma->vm_flags & VM_SHARED))		if (vma_needs_reservation(h, vma, address) < 0) {			ret = VM_FAULT_OOM;			goto backout_unlocked;		}	spin_lock(&mm->page_table_lock);	size = i_size_read(mapping->host) >> huge_page_shift(h);	if (idx >= size)		goto backout;	ret = 0;	if (!huge_pte_none(huge_ptep_get(ptep)))		goto backout;	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)				&& (vma->vm_flags & VM_SHARED)));	set_huge_pte_at(mm, address, ptep, new_pte);	if (write_access && !(vma->vm_flags & VM_SHARED)) {		/* Optimization, do the COW without a second fault */		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);	}	spin_unlock(&mm->page_table_lock);	unlock_page(page);out:	return ret;backout:	spin_unlock(&mm->page_table_lock);backout_unlocked:	unlock_page(page);	put_page(page);	goto out;}int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,			unsigned long address, int write_access){	pte_t *ptep;	pte_t entry;	int ret;	struct page *pagecache_page = NULL;	static DEFINE_MUTEX(hugetlb_instantiation_mutex);	struct hstate *h = hstate_vma(vma);	ptep = huge_pte_alloc(mm, address, huge_page_size(h));	if (!ptep)		return VM_FAULT_OOM;	/*	 * Serialize hugepage allocation and instantiation, so that we don't	 * get spurious allocation failures if two CPUs race to instantiate	 * the same page in the page cache.	 */	mutex_lock(&hugetlb_instantiation_mutex);	entry = huge_ptep_get(ptep);	if (huge_pte_none(entry)) {		ret = hugetlb_no_page(mm, vma, address, ptep, write_access);		goto out_mutex;	}	ret = 0;	/*	 * If we are going to COW the mapping later, we examine the pending	 * reservations for this page now. This will ensure that any	 * allocations necessary to record that reservation occur outside the	 * spinlock. For private mappings, we also lookup the pagecache	 * page now as it is used to determine if a reservation has been	 * consumed.	 */	if (write_access && !pte_write(entry)) {		if (vma_needs_reservation(h, vma, address) < 0) {			ret = VM_FAULT_OOM;			goto out_mutex;		}		if (!(vma->vm_flags & VM_SHARED))			pagecache_page = hugetlbfs_pagecache_page(h,								vma, address);	}	spin_lock(&mm->page_table_lock);	/* Check for a racing update before calling hugetlb_cow */	if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))		goto out_page_table_lock;	if (write_access) {		if (!pte_write(entry)) {			ret = hugetlb_cow(mm, vma, address, ptep, entry,							pagecache_page);			goto out_page_table_lock;		}		entry = pte_mkdirty(entry);	}	entry = pte_mkyoung(entry);	if (huge_ptep_set_access_flags(vma, address, ptep, entry, write_access))		update_mmu_cache(vma, address, entry);out_page_table_lock:	spin_unlock(&mm->page_table_lock);	if (pagecache_page) {		unlock_page(pagecache_page);		put_page(pagecache_page);	}out_mutex:	mutex_unlock(&hugetlb_instantiation_mutex);	return ret;}/* Can be overriden by architectures */__attribute__((weak)) struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,	       pud_t *pud, int write){	BUG();	return NULL;}static int huge_zeropage_ok(pte_t *ptep, int write, int shared){	if (!ptep || write || shared)		return 0;	else		return huge_pte_none(huge_ptep_get(ptep));}int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,			struct page **pages, struct vm_area_struct **vmas,			unsigned long *position, int *length, int i,			int write){	unsigned long pfn_offset;	unsigned long vaddr = *position;	int remainder = *length;	struct hstate *h = hstate_vma(vma);	int zeropage_ok = 0;	int shared = vma->vm_flags & VM_SHARED;	spin_lock(&mm->page_table_lock);	while (vaddr < vma->vm_end && remainder) {		pte_t *pte;		struct page *page;		/*		 * Some archs (sparc64, sh*) have multiple pte_ts to		 * each hugepage.  We have to make * sure we get the		 * first, for the page indexing below to work.		 */		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));		if (huge_zeropage_ok(pte, write, shared))			zeropage_ok = 1;		if (!pte ||		    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||		    (write && !pte_write(huge_ptep_get(pte)))) {			int ret;			spin_unlock(&mm->page_table_lock);			ret = hugetlb_fault(mm, vma, vaddr, write);			spin_lock(&mm->page_table_lock);			if (!(ret & VM_FAULT_ERROR))				continue;			remainder = 0;			if (!i)				i = -EFAULT;			break;		}		pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;		page = pte_page(huge_ptep_get(pte));same_page:		if (pages) {			if (zeropage_ok)				pages[i] = ZERO_PAGE(0);			else				pages[i] = mem_map_offset(page, pfn_offset);			get_page(pages[i]);		}		if (vmas)			vmas[i] = vma;		vaddr += PAGE_SIZE;		++pfn_offset;		--remainder;		++i;		if (vaddr < vma->vm_end && remainder &&				pfn_offset < pages_per_huge_page(h)) {			/*			 * We use pfn_offset to avoid touching the pageframes			 * of this compound page.			 */			goto same_page;		}	}	spin_unlock(&mm->page_table_lock);	*length = remainder;	*position = vaddr;	return i;}void hugetlb_change_protection(struct vm_area_struct *vma,		unsigned long address, unsigned long end, pgprot_t newprot){	struct mm_struct *mm = vma->vm_mm;	unsigned long start = address;	pte_t *ptep;	pte_t pte;	struct hstate *h = hstate_vma(vma);	BUG_ON(address >= end);	flush_cache_range(vma, address, end);	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);	spin_lock(&mm->page_table_lock);	for (; address < end; address += huge_page_size(h)) {		ptep = huge_pte_offset(mm, address);		if (!ptep)			continue;		if (huge_pmd_unshare(mm, &address, ptep))			continue;		if (!huge_pte_none(huge_ptep_get(ptep))) {			pte = huge_ptep_get_and_clear(mm, address, ptep);			pte = pte_mkhuge(pte_modify(pte, newprot));			set_huge_pte_at(mm, address, ptep, pte);		}	}	spin_unlock(&mm->page_table_lock);	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);	flush_tlb_range(vma, start, end);}int hugetlb_reserve_pages(struct inode *inode,					long from, long to,					struct vm_area_struct *vma,					int acctflag){	long ret, chg;	struct hstate *h = hstate_inode(inode);	/*	 * Only apply hugepage reservation if asked. At fault time, an	 * attempt will be made for VM_NORESERVE to allocate a page	 * and filesystem quota without using reserves	 */	if (acctflag & VM_NORESERVE)		return 0;	/*	 * Shared mappings base their reservation on the number of pages that	 * are already allocated on behalf of the file. Private mappings need	 * to reserve the full area even if read-only as mprotect() may be	 * called to make the mapping read-write. Assume !vma is a shm mapping	 */	if (!vma || vma->vm_flags & VM_SHARED)		chg = region_chg(&inode->i_mapping->private_list, from, to);	else {		struct resv_map *resv_map = resv_map_alloc();		if (!resv_map)			return -ENOMEM;		chg = to - from;		set_vma_resv_map(vma, resv_map);		set_vma_resv_flags(vma, HPAGE_RESV_OWNER);	}	if (chg < 0)		return chg;	/* There must be enough filesystem quota for the mapping */	if (hugetlb_get_quota(inode->i_mapping, chg))		return -ENOSPC;	/*	 * Check enough hugepages are available for the reservation.	 * Hand back the quota if there are not	 */	ret = hugetlb_acct_memory(h, chg);	if (ret < 0) {		hugetlb_put_quota(inode->i_mapping, chg);		return ret;	}	/*	 * Account for the reservations made. Shared mappings record regions	 * that have reservations as they are shared by multiple VMAs.	 * When the last VMA disappears, the region map says how much	 * the reservation was and the page cache tells how much of	 * the reservation was consumed. Private mappings are per-VMA and	 * only the consumed reservations are tracked. When the VMA	 * disappears, the original reservation is the VMA size and the	 * consumed reservations are stored in the map. Hence, nothing	 * else has to be done for private mappings here	 */	if (!vma || vma->vm_flags & VM_SHARED)		region_add(&inode->i_mapping->private_list, from, to);	return 0;}void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed){	struct hstate *h = hstate_inode(inode);	long chg = region_truncate(&inode->i_mapping->private_list, offset);	spin_lock(&inode->i_lock);	inode->i_blocks -= blocks_per_huge_page(h);	spin_unlock(&inode->i_lock);	hugetlb_put_quota(inode->i_mapping, (chg - freed));	hugetlb_acct_memory(h, -(chg - freed));}
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -