hugetlbpage.c

来自「底层驱动开发」· C语言 代码 · 共 746 行 · 第 1/2 页

C
746
字号
/* * PPC64 (POWER4) Huge TLB Page Support for Kernel. * * Copyright (C) 2003 David Gibson, IBM Corporation. * * Based on the IA-32 version: * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> */#include <linux/init.h>#include <linux/fs.h>#include <linux/mm.h>#include <linux/hugetlb.h>#include <linux/pagemap.h>#include <linux/smp_lock.h>#include <linux/slab.h>#include <linux/err.h>#include <linux/sysctl.h>#include <asm/mman.h>#include <asm/pgalloc.h>#include <asm/tlb.h>#include <asm/tlbflush.h>#include <asm/mmu_context.h>#include <asm/machdep.h>#include <asm/cputable.h>#include <asm/tlb.h>#include <linux/sysctl.h>#define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)#define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)/* Modelled after find_linux_pte() */pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr){	pgd_t *pg;	pud_t *pu;	pmd_t *pm;	pte_t *pt;	BUG_ON(! in_hugepage_area(mm->context, addr));	addr &= HPAGE_MASK;	pg = pgd_offset(mm, addr);	if (!pgd_none(*pg)) {		pu = pud_offset(pg, addr);		if (!pud_none(*pu)) {			pm = pmd_offset(pu, addr);			pt = (pte_t *)pm;			BUG_ON(!pmd_none(*pm)			       && !(pte_present(*pt) && pte_huge(*pt)));			return pt;		}	}	return NULL;}pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr){	pgd_t *pg;	pud_t *pu;	pmd_t *pm;	pte_t *pt;	BUG_ON(! in_hugepage_area(mm->context, addr));	addr &= HPAGE_MASK;	pg = pgd_offset(mm, addr);	pu = pud_alloc(mm, pg, addr);	if (pu) {		pm = pmd_alloc(mm, pu, addr);		if (pm) {			pt = (pte_t *)pm;			BUG_ON(!pmd_none(*pm)			       && !(pte_present(*pt) && pte_huge(*pt)));			return pt;		}	}	return NULL;}#define HUGEPTE_BATCH_SIZE	(HPAGE_SIZE / PMD_SIZE)void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,		     pte_t *ptep, pte_t pte){	int i;	if (pte_present(*ptep)) {		pte_clear(mm, addr, ptep);		flush_tlb_pending();	}	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {		*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);		ptep++;	}}pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,			      pte_t *ptep){	unsigned long old = pte_update(ptep, ~0UL);	int i;	if (old & _PAGE_HASHPTE)		hpte_update(mm, addr, old, 0);	for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)		ptep[i] = __pte(0);	return __pte(old);}/* * This function checks for proper alignment of input addr and len parameters. */int is_aligned_hugepage_range(unsigned long addr, unsigned long len){	if (len & ~HPAGE_MASK)		return -EINVAL;	if (addr & ~HPAGE_MASK)		return -EINVAL;	if (! (within_hugepage_low_range(addr, len)	       || within_hugepage_high_range(addr, len)) )		return -EINVAL;	return 0;}static void flush_low_segments(void *parm){	u16 areas = (unsigned long) parm;	unsigned long i;	asm volatile("isync" : : : "memory");	BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);	for (i = 0; i < NUM_LOW_AREAS; i++) {		if (! (areas & (1U << i)))			continue;		asm volatile("slbie %0"			     : : "r" ((i << SID_SHIFT) | SLBIE_C));	}	asm volatile("isync" : : : "memory");}static void flush_high_segments(void *parm){	u16 areas = (unsigned long) parm;	unsigned long i, j;	asm volatile("isync" : : : "memory");	BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);	for (i = 0; i < NUM_HIGH_AREAS; i++) {		if (! (areas & (1U << i)))			continue;		for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)			asm volatile("slbie %0"				     :: "r" (((i << HTLB_AREA_SHIFT)					     + (j << SID_SHIFT)) | SLBIE_C));	}	asm volatile("isync" : : : "memory");}static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area){	unsigned long start = area << SID_SHIFT;	unsigned long end = (area+1) << SID_SHIFT;	struct vm_area_struct *vma;	BUG_ON(area >= NUM_LOW_AREAS);	/* Check no VMAs are in the region */	vma = find_vma(mm, start);	if (vma && (vma->vm_start < end))		return -EBUSY;	return 0;}static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area){	unsigned long start = area << HTLB_AREA_SHIFT;	unsigned long end = (area+1) << HTLB_AREA_SHIFT;	struct vm_area_struct *vma;	BUG_ON(area >= NUM_HIGH_AREAS);	/* Check no VMAs are in the region */	vma = find_vma(mm, start);	if (vma && (vma->vm_start < end))		return -EBUSY;	return 0;}static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas){	unsigned long i;	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);	BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);	newareas &= ~(mm->context.low_htlb_areas);	if (! newareas)		return 0; /* The segments we want are already open */	for (i = 0; i < NUM_LOW_AREAS; i++)		if ((1 << i) & newareas)			if (prepare_low_area_for_htlb(mm, i) != 0)				return -EBUSY;	mm->context.low_htlb_areas |= newareas;	/* update the paca copy of the context struct */	get_paca()->context = mm->context;	/* the context change must make it to memory before the flush,	 * so that further SLB misses do the right thing. */	mb();	on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);	return 0;}static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas){	unsigned long i;	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);	BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)		     != NUM_HIGH_AREAS);	newareas &= ~(mm->context.high_htlb_areas);	if (! newareas)		return 0; /* The areas we want are already open */	for (i = 0; i < NUM_HIGH_AREAS; i++)		if ((1 << i) & newareas)			if (prepare_high_area_for_htlb(mm, i) != 0)				return -EBUSY;	mm->context.high_htlb_areas |= newareas;	/* update the paca copy of the context struct */	get_paca()->context = mm->context;	/* the context change must make it to memory before the flush,	 * so that further SLB misses do the right thing. */	mb();	on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);	return 0;}int prepare_hugepage_range(unsigned long addr, unsigned long len){	int err;	if ( (addr+len) < addr )		return -EINVAL;	if ((addr + len) < 0x100000000UL)		err = open_low_hpage_areas(current->mm,					  LOW_ESID_MASK(addr, len));	else		err = open_high_hpage_areas(current->mm,					    HTLB_AREA_MASK(addr, len));	if (err) {		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",		       addr, len,		       LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));		return err;	}	return 0;}struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write){	pte_t *ptep;	struct page *page;	if (! in_hugepage_area(mm->context, address))		return ERR_PTR(-EINVAL);	ptep = huge_pte_offset(mm, address);	page = pte_page(*ptep);	if (page)		page += (address % HPAGE_SIZE) / PAGE_SIZE;	return page;}int pmd_huge(pmd_t pmd){	return 0;}struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,		pmd_t *pmd, int write){	BUG();	return NULL;}/* Because we have an exclusive hugepage region which lies within the * normal user address space, we have to take special measures to make * non-huge mmap()s evade the hugepage reserved regions. */unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,				     unsigned long len, unsigned long pgoff,				     unsigned long flags){	struct mm_struct *mm = current->mm;	struct vm_area_struct *vma;	unsigned long start_addr;	if (len > TASK_SIZE)		return -ENOMEM;	if (addr) {		addr = PAGE_ALIGN(addr);		vma = find_vma(mm, addr);		if (((TASK_SIZE - len) >= addr)		    && (!vma || (addr+len) <= vma->vm_start)		    && !is_hugepage_only_range(mm, addr,len))			return addr;	}	if (len > mm->cached_hole_size) {	        start_addr = addr = mm->free_area_cache;	} else {	        start_addr = addr = TASK_UNMAPPED_BASE;	        mm->cached_hole_size = 0;	}full_search:	vma = find_vma(mm, addr);	while (TASK_SIZE - len >= addr) {		BUG_ON(vma && (addr >= vma->vm_end));		if (touches_hugepage_low_range(mm, addr, len)) {			addr = ALIGN(addr+1, 1<<SID_SHIFT);			vma = find_vma(mm, addr);			continue;		}		if (touches_hugepage_high_range(mm, addr, len)) {			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);			vma = find_vma(mm, addr);			continue;		}		if (!vma || addr + len <= vma->vm_start) {			/*			 * Remember the place where we stopped the search:			 */			mm->free_area_cache = addr + len;			return addr;		}		if (addr + mm->cached_hole_size < vma->vm_start)		        mm->cached_hole_size = vma->vm_start - addr;		addr = vma->vm_end;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?