📄 page_alloc.c
字号:
if (!zone->spanned_pages) return; spin_lock_irqsave(&zone->lock, flags); max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); if (!swsusp_page_is_forbidden(page)) swsusp_unset_page_free(page); } for_each_migratetype_order(order, t) { list_for_each(curr, &zone->free_area[order].free_list[t]) { unsigned long i; pfn = page_to_pfn(list_entry(curr, struct page, lru)); for (i = 0; i < (1UL << order); i++) swsusp_set_page_free(pfn_to_page(pfn + i)); } } spin_unlock_irqrestore(&zone->lock, flags);}#endif /* CONFIG_PM *//* * Free a 0-order page */static void free_hot_cold_page(struct page *page, int cold){ struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags; if (PageAnon(page)) page->mapping = NULL; if (free_pages_check(page)) return; if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), PAGE_SIZE); debug_check_no_obj_freed(page_address(page), PAGE_SIZE); } arch_free_page(page, 0); kernel_map_pages(page, 1, 0); pcp = &zone_pcp(zone, get_cpu())->pcp; local_irq_save(flags); __count_vm_event(PGFREE); if (cold) list_add_tail(&page->lru, &pcp->list); else list_add(&page->lru, &pcp->list); set_page_private(page, get_pageblock_migratetype(page)); pcp->count++; if (pcp->count >= pcp->high) { free_pages_bulk(zone, pcp->batch, &pcp->list, 0); pcp->count -= pcp->batch; } local_irq_restore(flags); put_cpu();}void free_hot_page(struct page *page){ free_hot_cold_page(page, 0);} void free_cold_page(struct page *page){ free_hot_cold_page(page, 1);}/* * split_page takes a non-compound higher-order page, and splits it into * n (1<<order) sub-pages: page[0..n] * Each sub-page must be freed individually. * * Note: this is probably too low level an operation for use in drivers. * Please consult with lkml before using this in your driver. */void split_page(struct page *page, unsigned int order){ int i; VM_BUG_ON(PageCompound(page)); VM_BUG_ON(!page_count(page)); for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i);}/* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. */static struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, int order, gfp_t gfp_flags){ unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); int cpu; int migratetype = allocflags_to_migratetype(gfp_flags);again: cpu = get_cpu(); if (likely(order == 0)) { struct per_cpu_pages *pcp; pcp = &zone_pcp(zone, cpu)->pcp; local_irq_save(flags); if (!pcp->count) { pcp->count = rmqueue_bulk(zone, 0, pcp->batch, &pcp->list, migratetype); if (unlikely(!pcp->count)) goto failed; } /* Find a page of the appropriate migrate type */ if (cold) { list_for_each_entry_reverse(page, &pcp->list, lru) if (page_private(page) == migratetype) break; } else { list_for_each_entry(page, &pcp->list, lru) if (page_private(page) == migratetype) break; } /* Allocate more to the pcp list if necessary */ if (unlikely(&page->lru == &pcp->list)) { pcp->count += rmqueue_bulk(zone, 0, pcp->batch, &pcp->list, migratetype); page = list_entry(pcp->list.next, struct page, lru); } list_del(&page->lru); pcp->count--; } else { spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order, migratetype); spin_unlock(&zone->lock); if (!page) goto failed; } __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone); local_irq_restore(flags); put_cpu(); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) goto again; return page;failed: local_irq_restore(flags); put_cpu(); return NULL;}#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */#define ALLOC_WMARK_MIN 0x02 /* use pages_min watermark */#define ALLOC_WMARK_LOW 0x04 /* use pages_low watermark */#define ALLOC_WMARK_HIGH 0x08 /* use pages_high watermark */#define ALLOC_HARDER 0x10 /* try to alloc harder */#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */#define ALLOC_CPUSET 0x40 /* check for correct cpuset */#ifdef CONFIG_FAIL_PAGE_ALLOCstatic struct fail_page_alloc_attr { struct fault_attr attr; u32 ignore_gfp_highmem; u32 ignore_gfp_wait; u32 min_order;#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS struct dentry *ignore_gfp_highmem_file; struct dentry *ignore_gfp_wait_file; struct dentry *min_order_file;#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */} fail_page_alloc = { .attr = FAULT_ATTR_INITIALIZER, .ignore_gfp_wait = 1, .ignore_gfp_highmem = 1, .min_order = 1,};static int __init setup_fail_page_alloc(char *str){ return setup_fault_attr(&fail_page_alloc.attr, str);}__setup("fail_page_alloc=", setup_fail_page_alloc);static int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order){ if (order < fail_page_alloc.min_order) return 0; if (gfp_mask & __GFP_NOFAIL) return 0; if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM)) return 0; if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT)) return 0; return should_fail(&fail_page_alloc.attr, 1 << order);}#ifdef CONFIG_FAULT_INJECTION_DEBUG_FSstatic int __init fail_page_alloc_debugfs(void){ mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; int err; err = init_fault_attr_dentries(&fail_page_alloc.attr, "fail_page_alloc"); if (err) return err; dir = fail_page_alloc.attr.dentries.dir; fail_page_alloc.ignore_gfp_wait_file = debugfs_create_bool("ignore-gfp-wait", mode, dir, &fail_page_alloc.ignore_gfp_wait); fail_page_alloc.ignore_gfp_highmem_file = debugfs_create_bool("ignore-gfp-highmem", mode, dir, &fail_page_alloc.ignore_gfp_highmem); fail_page_alloc.min_order_file = debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order); if (!fail_page_alloc.ignore_gfp_wait_file || !fail_page_alloc.ignore_gfp_highmem_file || !fail_page_alloc.min_order_file) { err = -ENOMEM; debugfs_remove(fail_page_alloc.ignore_gfp_wait_file); debugfs_remove(fail_page_alloc.ignore_gfp_highmem_file); debugfs_remove(fail_page_alloc.min_order_file); cleanup_fault_attr_dentries(&fail_page_alloc.attr); } return err;}late_initcall(fail_page_alloc_debugfs);#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */#else /* CONFIG_FAIL_PAGE_ALLOC */static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order){ return 0;}#endif /* CONFIG_FAIL_PAGE_ALLOC *//* * Return 1 if free pages are above 'mark'. This takes into account the order * of the allocation. */int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags){ /* free_pages my go negative - that's OK */ long min = mark; long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) min -= min / 2; if (alloc_flags & ALLOC_HARDER) min -= min / 4; if (free_pages <= min + z->lowmem_reserve[classzone_idx]) return 0; for (o = 0; o < order; o++) { /* At the next order, this order's pages become unavailable */ free_pages -= z->free_area[o].nr_free << o; /* Require fewer higher order pages to be free */ min >>= 1; if (free_pages <= min) return 0; } return 1;}#ifdef CONFIG_NUMA/* * zlc_setup - Setup for "zonelist cache". Uses cached zone data to * skip over zones that are not allowed by the cpuset, or that have * been recently (in last second) found to be nearly full. See further * comments in mmzone.h. Reduces cache footprint of zonelist scans * that have to skip over a lot of full or unallowed zones. * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) * * If the zonelist cache is not available for this zonelist, does * nothing and returns NULL. * * If the fullzones BITMAP in the zonelist cache is stale (more than * a second since last zap'd) then we zap it out (clear its bits.) * * We hold off even calling zlc_setup, until after we've checked the * first zone in the zonelist, on the theory that most allocations will * be satisfied from that first zone, so best to examine that zone as * quickly as we can. */static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags){ struct zonelist_cache *zlc; /* cached zonelist speedup info */ nodemask_t *allowednodes; /* zonelist_cache approximation */ zlc = zonelist->zlcache_ptr; if (!zlc) return NULL; if (time_after(jiffies, zlc->last_full_zap + HZ)) { bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); zlc->last_full_zap = jiffies; } allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? &cpuset_current_mems_allowed : &node_states[N_HIGH_MEMORY]; return allowednodes;}/* * Given 'z' scanning a zonelist, run a couple of quick checks to see * if it is worth looking at further for free memory: * 1) Check that the zone isn't thought to be full (doesn't have its * bit set in the zonelist_cache fullzones BITMAP). * 2) Check that the zones node (obtained from the zonelist_cache * z_to_n[] mapping) is allowed in the passed in allowednodes mask. * Return true (non-zero) if zone is worth looking at further, or * else return false (zero) if it is not. * * This check -ignores- the distinction between various watermarks, * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ... If a zone is * found to be full for any variation of these watermarks, it will * be considered full for up to one second by all requests, unless * we are so low on memory on all allowed nodes that we are forced * into the second scan of the zonelist. * * In the second scan we ignore this zonelist cache and exactly * apply the watermarks to all zones, even it is slower to do so. * We are low on memory in the second scan, and should leave no stone * unturned looking for a free page. */static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, nodemask_t *allowednodes){ struct zonelist_cache *zlc; /* cached zonelist speedup info */ int i; /* index of *z in zonelist zones */ int n; /* node that zone *z is on */ zlc = zonelist->zlcache_ptr; if (!zlc) return 1; i = z - zonelist->_zonerefs; n = zlc->z_to_n[i]; /* This zone is worth trying if it is allowed but not full */ return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones);}/* * Given 'z' scanning a zonelist, set the corresponding bit in * zlc->fullzones, so that subsequent attempts to allocate a page * from that zone don't waste time re-examining it. */static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z){ struct zonelist_cache *zlc; /* cached zonelist speedup info */ int i; /* index of *z in zonelist zones */ zlc = zonelist->zlcache_ptr; if (!zlc) return; i = z - zonelist->_zonerefs; set_bit(i, zlc->fullzones);}#else /* CONFIG_NUMA */static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags){ return NULL;}static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, nodemask_t *allowednodes){ return 1;}static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z){}#endif /* CONFIG_NUMA *//* * get_page_from_freelist goes through the zonelist trying to allocate * a page. */static struct page *get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags){ struct zoneref *z; struct page *page = NULL; int classzone_idx; struct zone *zone, *preferred_zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); if (!preferred_zone) return NULL; classzone_idx = zone_idx(preferred_zone);zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) goto try_next_zone; if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; if (alloc_flags & ALLOC_WMARK_MIN) mark = zone->pages_min; else if (alloc_flags & ALLOC_WMARK_LOW) mark = zone->pages_low; else mark = zone->pages_high; if (!zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) { if (!zone_reclaim_mode || !zone_reclaim(zone, gfp_mask, order)) goto this_zone_full; } } page = buffered_rmqueue(preferred_zone, zone, order, gfp_mask); if (page) break;this_zone_full: if (NUMA_BUILD) zlc_mark_zone_full(zonelist, z);try_next_zone: if (NUMA_BUILD && !did_zlc_setup) { /* we do zlc_setup after the first zone is tried */ allowednodes = zlc_setup(zonelist, alloc_flags);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -