📄 hugetlbpage.c
字号:
/* * PPC64 (POWER4) Huge TLB Page Support for Kernel. * * Copyright (C) 2003 David Gibson, IBM Corporation. * * Based on the IA-32 version: * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> */#include <linux/init.h>#include <linux/fs.h>#include <linux/mm.h>#include <linux/hugetlb.h>#include <linux/pagemap.h>#include <linux/smp_lock.h>#include <linux/slab.h>#include <linux/err.h>#include <linux/sysctl.h>#include <asm/mman.h>#include <asm/pgalloc.h>#include <asm/tlb.h>#include <asm/tlbflush.h>#include <asm/mmu_context.h>#include <asm/machdep.h>#include <asm/cputable.h>#include <asm/tlb.h>#include <linux/sysctl.h>#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)/* Modelled after find_linux_pte() */pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr){ pgd_t *pg; pud_t *pu; pmd_t *pm; pte_t *pt; BUG_ON(! in_hugepage_area(mm->context, addr)); addr &= HPAGE_MASK; pg = pgd_offset(mm, addr); if (!pgd_none(*pg)) { pu = pud_offset(pg, addr); if (!pud_none(*pu)) { pm = pmd_offset(pu, addr);#ifdef CONFIG_PPC_64K_PAGES /* Currently, we use the normal PTE offset within full * size PTE pages, thus our huge PTEs are scattered in * the PTE page and we do waste some. We may change * that in the future, but the current mecanism keeps * things much simpler */ if (!pmd_none(*pm)) { /* Note: pte_offset_* are all equivalent on * ppc64 as we don't have HIGHMEM */ pt = pte_offset_kernel(pm, addr); return pt; }#else /* CONFIG_PPC_64K_PAGES */ /* On 4k pages, we put huge PTEs in the PMD page */ pt = (pte_t *)pm; return pt;#endif /* CONFIG_PPC_64K_PAGES */ } } return NULL;}pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr){ pgd_t *pg; pud_t *pu; pmd_t *pm; pte_t *pt; BUG_ON(! in_hugepage_area(mm->context, addr)); addr &= HPAGE_MASK; pg = pgd_offset(mm, addr); pu = pud_alloc(mm, pg, addr); if (pu) { pm = pmd_alloc(mm, pu, addr); if (pm) {#ifdef CONFIG_PPC_64K_PAGES /* See comment in huge_pte_offset. Note that if we ever * want to put the page size in the PMD, we would have * to open code our own pte_alloc* function in order * to populate and set the size atomically */ pt = pte_alloc_map(mm, pm, addr);#else /* CONFIG_PPC_64K_PAGES */ pt = (pte_t *)pm;#endif /* CONFIG_PPC_64K_PAGES */ return pt; } } return NULL;}void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte){ if (pte_present(*ptep)) { /* We open-code pte_clear because we need to pass the right * argument to hpte_update (huge / !huge) */ unsigned long old = pte_update(ptep, ~0UL); if (old & _PAGE_HASHPTE) hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); flush_tlb_pending(); } *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);}pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep){ unsigned long old = pte_update(ptep, ~0UL); if (old & _PAGE_HASHPTE) hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); *ptep = __pte(0); return __pte(old);}/* * This function checks for proper alignment of input addr and len parameters. */int is_aligned_hugepage_range(unsigned long addr, unsigned long len){ if (len & ~HPAGE_MASK) return -EINVAL; if (addr & ~HPAGE_MASK) return -EINVAL; if (! (within_hugepage_low_range(addr, len) || within_hugepage_high_range(addr, len)) ) return -EINVAL; return 0;}struct slb_flush_info { struct mm_struct *mm; u16 newareas;};static void flush_low_segments(void *parm){ struct slb_flush_info *fi = parm; unsigned long i; BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); if (current->active_mm != fi->mm) return; /* Only need to do anything if this CPU is working in the same * mm as the one which has changed */ /* update the paca copy of the context struct */ get_paca()->context = current->active_mm->context; asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_LOW_AREAS; i++) { if (! (fi->newareas & (1U << i))) continue; asm volatile("slbie %0" : : "r" ((i << SID_SHIFT) | SLBIE_C)); } asm volatile("isync" : : : "memory");}static void flush_high_segments(void *parm){ struct slb_flush_info *fi = parm; unsigned long i, j; BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); if (current->active_mm != fi->mm) return; /* Only need to do anything if this CPU is working in the same * mm as the one which has changed */ /* update the paca copy of the context struct */ get_paca()->context = current->active_mm->context; asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_HIGH_AREAS; i++) { if (! (fi->newareas & (1U << i))) continue; for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) asm volatile("slbie %0" :: "r" (((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT)) | SLBIE_C)); } asm volatile("isync" : : : "memory");}static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area){ unsigned long start = area << SID_SHIFT; unsigned long end = (area+1) << SID_SHIFT; struct vm_area_struct *vma; BUG_ON(area >= NUM_LOW_AREAS); /* Check no VMAs are in the region */ vma = find_vma(mm, start); if (vma && (vma->vm_start < end)) return -EBUSY; return 0;}static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area){ unsigned long start = area << HTLB_AREA_SHIFT; unsigned long end = (area+1) << HTLB_AREA_SHIFT; struct vm_area_struct *vma; BUG_ON(area >= NUM_HIGH_AREAS); /* Hack, so that each addresses is controlled by exactly one * of the high or low area bitmaps, the first high area starts * at 4GB, not 0 */ if (start == 0) start = 0x100000000UL; /* Check no VMAs are in the region */ vma = find_vma(mm, start); if (vma && (vma->vm_start < end)) return -EBUSY; return 0;}static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas){ unsigned long i; struct slb_flush_info fi; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); newareas &= ~(mm->context.low_htlb_areas); if (! newareas) return 0; /* The segments we want are already open */ for (i = 0; i < NUM_LOW_AREAS; i++) if ((1 << i) & newareas) if (prepare_low_area_for_htlb(mm, i) != 0) return -EBUSY; mm->context.low_htlb_areas |= newareas; /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); fi.mm = mm; fi.newareas = newareas; on_each_cpu(flush_low_segments, &fi, 0, 1); return 0;}static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas){ struct slb_flush_info fi; unsigned long i; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) != NUM_HIGH_AREAS); newareas &= ~(mm->context.high_htlb_areas); if (! newareas) return 0; /* The areas we want are already open */ for (i = 0; i < NUM_HIGH_AREAS; i++) if ((1 << i) & newareas) if (prepare_high_area_for_htlb(mm, i) != 0) return -EBUSY; mm->context.high_htlb_areas |= newareas; /* update the paca copy of the context struct */ get_paca()->context = mm->context; /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); fi.mm = mm; fi.newareas = newareas; on_each_cpu(flush_high_segments, &fi, 0, 1); return 0;}int prepare_hugepage_range(unsigned long addr, unsigned long len){ int err = 0; if ( (addr+len) < addr ) return -EINVAL; if (addr < 0x100000000UL) err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); if ((addr + len) > 0x100000000UL) err = open_high_hpage_areas(current->mm, HTLB_AREA_MASK(addr, len)); if (err) { printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", addr, len, LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); return err; } return 0;}struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write){ pte_t *ptep; struct page *page; if (! in_hugepage_area(mm->context, address)) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); page = pte_page(*ptep); if (page) page += (address % HPAGE_SIZE) / PAGE_SIZE; return page;}int pmd_huge(pmd_t pmd){ return 0;}struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write){ BUG(); return NULL;}/* Because we have an exclusive hugepage region which lies within the * normal user address space, we have to take special measures to make * non-huge mmap()s evade the hugepage reserved regions. */unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags){ struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long start_addr; if (len > TASK_SIZE) return -ENOMEM; if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (((TASK_SIZE - len) >= addr) && (!vma || (addr+len) <= vma->vm_start) && !is_hugepage_only_range(mm, addr,len)) return addr; } if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { start_addr = addr = TASK_UNMAPPED_BASE; mm->cached_hole_size = 0; }full_search: vma = find_vma(mm, addr); while (TASK_SIZE - len >= addr) { BUG_ON(vma && (addr >= vma->vm_end)); if (touches_hugepage_low_range(mm, addr, len)) { addr = ALIGN(addr+1, 1<<SID_SHIFT); vma = find_vma(mm, addr); continue; } if (touches_hugepage_high_range(mm, addr, len)) { addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); vma = find_vma(mm, addr); continue; } if (!vma || addr + len <= vma->vm_start) { /* * Remember the place where we stopped the search:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -