📄 hugetlbpage.c

📁 linux-2.6.15.6
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * PPC64 (POWER4) Huge TLB Page Support for Kernel. * * Copyright (C) 2003 David Gibson, IBM Corporation. * * Based on the IA-32 version: * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> */#include <linux/init.h>#include <linux/fs.h>#include <linux/mm.h>#include <linux/hugetlb.h>#include <linux/pagemap.h>#include <linux/smp_lock.h>#include <linux/slab.h>#include <linux/err.h>#include <linux/sysctl.h>#include <asm/mman.h>#include <asm/pgalloc.h>#include <asm/tlb.h>#include <asm/tlbflush.h>#include <asm/mmu_context.h>#include <asm/machdep.h>#include <asm/cputable.h>#include <asm/tlb.h>#include <linux/sysctl.h>#define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)#define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)/* Modelled after find_linux_pte() */pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr){	pgd_t *pg;	pud_t *pu;	pmd_t *pm;	pte_t *pt;	BUG_ON(! in_hugepage_area(mm->context, addr));	addr &= HPAGE_MASK;	pg = pgd_offset(mm, addr);	if (!pgd_none(*pg)) {		pu = pud_offset(pg, addr);		if (!pud_none(*pu)) {			pm = pmd_offset(pu, addr);#ifdef CONFIG_PPC_64K_PAGES			/* Currently, we use the normal PTE offset within full			 * size PTE pages, thus our huge PTEs are scattered in			 * the PTE page and we do waste some. We may change			 * that in the future, but the current mecanism keeps			 * things much simpler			 */			if (!pmd_none(*pm)) {				/* Note: pte_offset_* are all equivalent on				 * ppc64 as we don't have HIGHMEM				 */				pt = pte_offset_kernel(pm, addr);				return pt;			}#else /* CONFIG_PPC_64K_PAGES */			/* On 4k pages, we put huge PTEs in the PMD page */			pt = (pte_t *)pm;			return pt;#endif /* CONFIG_PPC_64K_PAGES */		}	}	return NULL;}pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr){	pgd_t *pg;	pud_t *pu;	pmd_t *pm;	pte_t *pt;	BUG_ON(! in_hugepage_area(mm->context, addr));	addr &= HPAGE_MASK;	pg = pgd_offset(mm, addr);	pu = pud_alloc(mm, pg, addr);	if (pu) {		pm = pmd_alloc(mm, pu, addr);		if (pm) {#ifdef CONFIG_PPC_64K_PAGES			/* See comment in huge_pte_offset. Note that if we ever			 * want to put the page size in the PMD, we would have			 * to open code our own pte_alloc* function in order			 * to populate and set the size atomically			 */			pt = pte_alloc_map(mm, pm, addr);#else /* CONFIG_PPC_64K_PAGES */			pt = (pte_t *)pm;#endif /* CONFIG_PPC_64K_PAGES */			return pt;		}	}	return NULL;}void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,		     pte_t *ptep, pte_t pte){	if (pte_present(*ptep)) {		/* We open-code pte_clear because we need to pass the right		 * argument to hpte_update (huge / !huge)		 */		unsigned long old = pte_update(ptep, ~0UL);		if (old & _PAGE_HASHPTE)			hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);		flush_tlb_pending();	}	*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);}pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,			      pte_t *ptep){	unsigned long old = pte_update(ptep, ~0UL);	if (old & _PAGE_HASHPTE)		hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);	*ptep = __pte(0);	return __pte(old);}/* * This function checks for proper alignment of input addr and len parameters. */int is_aligned_hugepage_range(unsigned long addr, unsigned long len){	if (len & ~HPAGE_MASK)		return -EINVAL;	if (addr & ~HPAGE_MASK)		return -EINVAL;	if (! (within_hugepage_low_range(addr, len)	       || within_hugepage_high_range(addr, len)) )		return -EINVAL;	return 0;}struct slb_flush_info {	struct mm_struct *mm;	u16 newareas;};static void flush_low_segments(void *parm){	struct slb_flush_info *fi = parm;	unsigned long i;	BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS);	if (current->active_mm != fi->mm)		return;	/* Only need to do anything if this CPU is working in the same	 * mm as the one which has changed */	/* update the paca copy of the context struct */	get_paca()->context = current->active_mm->context;	asm volatile("isync" : : : "memory");	for (i = 0; i < NUM_LOW_AREAS; i++) {		if (! (fi->newareas & (1U << i)))			continue;		asm volatile("slbie %0"			     : : "r" ((i << SID_SHIFT) | SLBIE_C));	}	asm volatile("isync" : : : "memory");}static void flush_high_segments(void *parm){	struct slb_flush_info *fi = parm;	unsigned long i, j;	BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS);	if (current->active_mm != fi->mm)		return;	/* Only need to do anything if this CPU is working in the same	 * mm as the one which has changed */	/* update the paca copy of the context struct */	get_paca()->context = current->active_mm->context;	asm volatile("isync" : : : "memory");	for (i = 0; i < NUM_HIGH_AREAS; i++) {		if (! (fi->newareas & (1U << i)))			continue;		for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)			asm volatile("slbie %0"				     :: "r" (((i << HTLB_AREA_SHIFT)					      + (j << SID_SHIFT)) | SLBIE_C));	}	asm volatile("isync" : : : "memory");}static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area){	unsigned long start = area << SID_SHIFT;	unsigned long end = (area+1) << SID_SHIFT;	struct vm_area_struct *vma;	BUG_ON(area >= NUM_LOW_AREAS);	/* Check no VMAs are in the region */	vma = find_vma(mm, start);	if (vma && (vma->vm_start < end))		return -EBUSY;	return 0;}static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area){	unsigned long start = area << HTLB_AREA_SHIFT;	unsigned long end = (area+1) << HTLB_AREA_SHIFT;	struct vm_area_struct *vma;	BUG_ON(area >= NUM_HIGH_AREAS);	/* Hack, so that each addresses is controlled by exactly one	 * of the high or low area bitmaps, the first high area starts	 * at 4GB, not 0 */	if (start == 0)		start = 0x100000000UL;	/* Check no VMAs are in the region */	vma = find_vma(mm, start);	if (vma && (vma->vm_start < end))		return -EBUSY;	return 0;}static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas){	unsigned long i;	struct slb_flush_info fi;	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);	BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);	newareas &= ~(mm->context.low_htlb_areas);	if (! newareas)		return 0; /* The segments we want are already open */	for (i = 0; i < NUM_LOW_AREAS; i++)		if ((1 << i) & newareas)			if (prepare_low_area_for_htlb(mm, i) != 0)				return -EBUSY;	mm->context.low_htlb_areas |= newareas;	/* the context change must make it to memory before the flush,	 * so that further SLB misses do the right thing. */	mb();	fi.mm = mm;	fi.newareas = newareas;	on_each_cpu(flush_low_segments, &fi, 0, 1);	return 0;}static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas){	struct slb_flush_info fi;	unsigned long i;	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);	BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)		     != NUM_HIGH_AREAS);	newareas &= ~(mm->context.high_htlb_areas);	if (! newareas)		return 0; /* The areas we want are already open */	for (i = 0; i < NUM_HIGH_AREAS; i++)		if ((1 << i) & newareas)			if (prepare_high_area_for_htlb(mm, i) != 0)				return -EBUSY;	mm->context.high_htlb_areas |= newareas;	/* update the paca copy of the context struct */	get_paca()->context = mm->context;	/* the context change must make it to memory before the flush,	 * so that further SLB misses do the right thing. */	mb();	fi.mm = mm;	fi.newareas = newareas;	on_each_cpu(flush_high_segments, &fi, 0, 1);	return 0;}int prepare_hugepage_range(unsigned long addr, unsigned long len){	int err = 0;	if ( (addr+len) < addr )		return -EINVAL;	if (addr < 0x100000000UL)		err = open_low_hpage_areas(current->mm,					  LOW_ESID_MASK(addr, len));	if ((addr + len) > 0x100000000UL)		err = open_high_hpage_areas(current->mm,					    HTLB_AREA_MASK(addr, len));	if (err) {		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",		       addr, len,		       LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));		return err;	}	return 0;}struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write){	pte_t *ptep;	struct page *page;	if (! in_hugepage_area(mm->context, address))		return ERR_PTR(-EINVAL);	ptep = huge_pte_offset(mm, address);	page = pte_page(*ptep);	if (page)		page += (address % HPAGE_SIZE) / PAGE_SIZE;	return page;}int pmd_huge(pmd_t pmd){	return 0;}struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,		pmd_t *pmd, int write){	BUG();	return NULL;}/* Because we have an exclusive hugepage region which lies within the * normal user address space, we have to take special measures to make * non-huge mmap()s evade the hugepage reserved regions. */unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,				     unsigned long len, unsigned long pgoff,				     unsigned long flags){	struct mm_struct *mm = current->mm;	struct vm_area_struct *vma;	unsigned long start_addr;	if (len > TASK_SIZE)		return -ENOMEM;	if (addr) {		addr = PAGE_ALIGN(addr);		vma = find_vma(mm, addr);		if (((TASK_SIZE - len) >= addr)		    && (!vma || (addr+len) <= vma->vm_start)		    && !is_hugepage_only_range(mm, addr,len))			return addr;	}	if (len > mm->cached_hole_size) {	        start_addr = addr = mm->free_area_cache;	} else {	        start_addr = addr = TASK_UNMAPPED_BASE;	        mm->cached_hole_size = 0;	}full_search:	vma = find_vma(mm, addr);	while (TASK_SIZE - len >= addr) {		BUG_ON(vma && (addr >= vma->vm_end));		if (touches_hugepage_low_range(mm, addr, len)) {			addr = ALIGN(addr+1, 1<<SID_SHIFT);			vma = find_vma(mm, addr);			continue;		}		if (touches_hugepage_high_range(mm, addr, len)) {			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);			vma = find_vma(mm, addr);			continue;		}		if (!vma || addr + len <= vma->vm_start) {			/*			 * Remember the place where we stopped the search:
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -