📄 page_alloc.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	if (!zone->spanned_pages)		return;	spin_lock_irqsave(&zone->lock, flags);	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)		if (pfn_valid(pfn)) {			struct page *page = pfn_to_page(pfn);			if (!swsusp_page_is_forbidden(page))				swsusp_unset_page_free(page);		}	for_each_migratetype_order(order, t) {		list_for_each(curr, &zone->free_area[order].free_list[t]) {			unsigned long i;			pfn = page_to_pfn(list_entry(curr, struct page, lru));			for (i = 0; i < (1UL << order); i++)				swsusp_set_page_free(pfn_to_page(pfn + i));		}	}	spin_unlock_irqrestore(&zone->lock, flags);}#endif /* CONFIG_PM *//* * Free a 0-order page */static void free_hot_cold_page(struct page *page, int cold){	struct zone *zone = page_zone(page);	struct per_cpu_pages *pcp;	unsigned long flags;	if (PageAnon(page))		page->mapping = NULL;	if (free_pages_check(page))		return;	if (!PageHighMem(page)) {		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);		debug_check_no_obj_freed(page_address(page), PAGE_SIZE);	}	arch_free_page(page, 0);	kernel_map_pages(page, 1, 0);	pcp = &zone_pcp(zone, get_cpu())->pcp;	local_irq_save(flags);	__count_vm_event(PGFREE);	if (cold)		list_add_tail(&page->lru, &pcp->list);	else		list_add(&page->lru, &pcp->list);	set_page_private(page, get_pageblock_migratetype(page));	pcp->count++;	if (pcp->count >= pcp->high) {		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);		pcp->count -= pcp->batch;	}	local_irq_restore(flags);	put_cpu();}void free_hot_page(struct page *page){	free_hot_cold_page(page, 0);}	void free_cold_page(struct page *page){	free_hot_cold_page(page, 1);}/* * split_page takes a non-compound higher-order page, and splits it into * n (1<<order) sub-pages: page[0..n] * Each sub-page must be freed individually. * * Note: this is probably too low level an operation for use in drivers. * Please consult with lkml before using this in your driver. */void split_page(struct page *page, unsigned int order){	int i;	VM_BUG_ON(PageCompound(page));	VM_BUG_ON(!page_count(page));	for (i = 1; i < (1 << order); i++)		set_page_refcounted(page + i);}/* * Really, prep_compound_page() should be called from __rmqueue_bulk().  But * we cheat by calling it from here, in the order > 0 path.  Saves a branch * or two. */static struct page *buffered_rmqueue(struct zone *preferred_zone,			struct zone *zone, int order, gfp_t gfp_flags){	unsigned long flags;	struct page *page;	int cold = !!(gfp_flags & __GFP_COLD);	int cpu;	int migratetype = allocflags_to_migratetype(gfp_flags);again:	cpu  = get_cpu();	if (likely(order == 0)) {		struct per_cpu_pages *pcp;		pcp = &zone_pcp(zone, cpu)->pcp;		local_irq_save(flags);		if (!pcp->count) {			pcp->count = rmqueue_bulk(zone, 0,					pcp->batch, &pcp->list, migratetype);			if (unlikely(!pcp->count))				goto failed;		}		/* Find a page of the appropriate migrate type */		if (cold) {			list_for_each_entry_reverse(page, &pcp->list, lru)				if (page_private(page) == migratetype)					break;		} else {			list_for_each_entry(page, &pcp->list, lru)				if (page_private(page) == migratetype)					break;		}		/* Allocate more to the pcp list if necessary */		if (unlikely(&page->lru == &pcp->list)) {			pcp->count += rmqueue_bulk(zone, 0,					pcp->batch, &pcp->list, migratetype);			page = list_entry(pcp->list.next, struct page, lru);		}		list_del(&page->lru);		pcp->count--;	} else {		spin_lock_irqsave(&zone->lock, flags);		page = __rmqueue(zone, order, migratetype);		spin_unlock(&zone->lock);		if (!page)			goto failed;	}	__count_zone_vm_events(PGALLOC, zone, 1 << order);	zone_statistics(preferred_zone, zone);	local_irq_restore(flags);	put_cpu();	VM_BUG_ON(bad_range(zone, page));	if (prep_new_page(page, order, gfp_flags))		goto again;	return page;failed:	local_irq_restore(flags);	put_cpu();	return NULL;}#define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */#define ALLOC_WMARK_MIN		0x02 /* use pages_min watermark */#define ALLOC_WMARK_LOW		0x04 /* use pages_low watermark */#define ALLOC_WMARK_HIGH	0x08 /* use pages_high watermark */#define ALLOC_HARDER		0x10 /* try to alloc harder */#define ALLOC_HIGH		0x20 /* __GFP_HIGH set */#define ALLOC_CPUSET		0x40 /* check for correct cpuset */#ifdef CONFIG_FAIL_PAGE_ALLOCstatic struct fail_page_alloc_attr {	struct fault_attr attr;	u32 ignore_gfp_highmem;	u32 ignore_gfp_wait;	u32 min_order;#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS	struct dentry *ignore_gfp_highmem_file;	struct dentry *ignore_gfp_wait_file;	struct dentry *min_order_file;#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */} fail_page_alloc = {	.attr = FAULT_ATTR_INITIALIZER,	.ignore_gfp_wait = 1,	.ignore_gfp_highmem = 1,	.min_order = 1,};static int __init setup_fail_page_alloc(char *str){	return setup_fault_attr(&fail_page_alloc.attr, str);}__setup("fail_page_alloc=", setup_fail_page_alloc);static int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order){	if (order < fail_page_alloc.min_order)		return 0;	if (gfp_mask & __GFP_NOFAIL)		return 0;	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))		return 0;	if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT))		return 0;	return should_fail(&fail_page_alloc.attr, 1 << order);}#ifdef CONFIG_FAULT_INJECTION_DEBUG_FSstatic int __init fail_page_alloc_debugfs(void){	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;	struct dentry *dir;	int err;	err = init_fault_attr_dentries(&fail_page_alloc.attr,				       "fail_page_alloc");	if (err)		return err;	dir = fail_page_alloc.attr.dentries.dir;	fail_page_alloc.ignore_gfp_wait_file =		debugfs_create_bool("ignore-gfp-wait", mode, dir,				      &fail_page_alloc.ignore_gfp_wait);	fail_page_alloc.ignore_gfp_highmem_file =		debugfs_create_bool("ignore-gfp-highmem", mode, dir,				      &fail_page_alloc.ignore_gfp_highmem);	fail_page_alloc.min_order_file =		debugfs_create_u32("min-order", mode, dir,				   &fail_page_alloc.min_order);	if (!fail_page_alloc.ignore_gfp_wait_file ||            !fail_page_alloc.ignore_gfp_highmem_file ||            !fail_page_alloc.min_order_file) {		err = -ENOMEM;		debugfs_remove(fail_page_alloc.ignore_gfp_wait_file);		debugfs_remove(fail_page_alloc.ignore_gfp_highmem_file);		debugfs_remove(fail_page_alloc.min_order_file);		cleanup_fault_attr_dentries(&fail_page_alloc.attr);	}	return err;}late_initcall(fail_page_alloc_debugfs);#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */#else /* CONFIG_FAIL_PAGE_ALLOC */static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order){	return 0;}#endif /* CONFIG_FAIL_PAGE_ALLOC *//* * Return 1 if free pages are above 'mark'. This takes into account the order * of the allocation. */int zone_watermark_ok(struct zone *z, int order, unsigned long mark,		      int classzone_idx, int alloc_flags){	/* free_pages my go negative - that's OK */	long min = mark;	long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;	int o;	if (alloc_flags & ALLOC_HIGH)		min -= min / 2;	if (alloc_flags & ALLOC_HARDER)		min -= min / 4;	if (free_pages <= min + z->lowmem_reserve[classzone_idx])		return 0;	for (o = 0; o < order; o++) {		/* At the next order, this order's pages become unavailable */		free_pages -= z->free_area[o].nr_free << o;		/* Require fewer higher order pages to be free */		min >>= 1;		if (free_pages <= min)			return 0;	}	return 1;}#ifdef CONFIG_NUMA/* * zlc_setup - Setup for "zonelist cache".  Uses cached zone data to * skip over zones that are not allowed by the cpuset, or that have * been recently (in last second) found to be nearly full.  See further * comments in mmzone.h.  Reduces cache footprint of zonelist scans * that have to skip over a lot of full or unallowed zones. * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) * * If the zonelist cache is not available for this zonelist, does * nothing and returns NULL. * * If the fullzones BITMAP in the zonelist cache is stale (more than * a second since last zap'd) then we zap it out (clear its bits.) * * We hold off even calling zlc_setup, until after we've checked the * first zone in the zonelist, on the theory that most allocations will * be satisfied from that first zone, so best to examine that zone as * quickly as we can. */static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags){	struct zonelist_cache *zlc;	/* cached zonelist speedup info */	nodemask_t *allowednodes;	/* zonelist_cache approximation */	zlc = zonelist->zlcache_ptr;	if (!zlc)		return NULL;	if (time_after(jiffies, zlc->last_full_zap + HZ)) {		bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);		zlc->last_full_zap = jiffies;	}	allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?					&cpuset_current_mems_allowed :					&node_states[N_HIGH_MEMORY];	return allowednodes;}/* * Given 'z' scanning a zonelist, run a couple of quick checks to see * if it is worth looking at further for free memory: *  1) Check that the zone isn't thought to be full (doesn't have its *     bit set in the zonelist_cache fullzones BITMAP). *  2) Check that the zones node (obtained from the zonelist_cache *     z_to_n[] mapping) is allowed in the passed in allowednodes mask. * Return true (non-zero) if zone is worth looking at further, or * else return false (zero) if it is not. * * This check -ignores- the distinction between various watermarks, * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ...  If a zone is * found to be full for any variation of these watermarks, it will * be considered full for up to one second by all requests, unless * we are so low on memory on all allowed nodes that we are forced * into the second scan of the zonelist. * * In the second scan we ignore this zonelist cache and exactly * apply the watermarks to all zones, even it is slower to do so. * We are low on memory in the second scan, and should leave no stone * unturned looking for a free page. */static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,						nodemask_t *allowednodes){	struct zonelist_cache *zlc;	/* cached zonelist speedup info */	int i;				/* index of *z in zonelist zones */	int n;				/* node that zone *z is on */	zlc = zonelist->zlcache_ptr;	if (!zlc)		return 1;	i = z - zonelist->_zonerefs;	n = zlc->z_to_n[i];	/* This zone is worth trying if it is allowed but not full */	return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones);}/* * Given 'z' scanning a zonelist, set the corresponding bit in * zlc->fullzones, so that subsequent attempts to allocate a page * from that zone don't waste time re-examining it. */static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z){	struct zonelist_cache *zlc;	/* cached zonelist speedup info */	int i;				/* index of *z in zonelist zones */	zlc = zonelist->zlcache_ptr;	if (!zlc)		return;	i = z - zonelist->_zonerefs;	set_bit(i, zlc->fullzones);}#else	/* CONFIG_NUMA */static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags){	return NULL;}static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,				nodemask_t *allowednodes){	return 1;}static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z){}#endif	/* CONFIG_NUMA *//* * get_page_from_freelist goes through the zonelist trying to allocate * a page. */static struct page *get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,		struct zonelist *zonelist, int high_zoneidx, int alloc_flags){	struct zoneref *z;	struct page *page = NULL;	int classzone_idx;	struct zone *zone, *preferred_zone;	nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */	int zlc_active = 0;		/* set if using zonelist_cache */	int did_zlc_setup = 0;		/* just call zlc_setup() one time */	(void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,							&preferred_zone);	if (!preferred_zone)		return NULL;	classzone_idx = zone_idx(preferred_zone);zonelist_scan:	/*	 * Scan zonelist, looking for a zone with enough free.	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.	 */	for_each_zone_zonelist_nodemask(zone, z, zonelist,						high_zoneidx, nodemask) {		if (NUMA_BUILD && zlc_active &&			!zlc_zone_worth_trying(zonelist, z, allowednodes))				continue;		if ((alloc_flags & ALLOC_CPUSET) &&			!cpuset_zone_allowed_softwall(zone, gfp_mask))				goto try_next_zone;		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {			unsigned long mark;			if (alloc_flags & ALLOC_WMARK_MIN)				mark = zone->pages_min;			else if (alloc_flags & ALLOC_WMARK_LOW)				mark = zone->pages_low;			else				mark = zone->pages_high;			if (!zone_watermark_ok(zone, order, mark,				    classzone_idx, alloc_flags)) {				if (!zone_reclaim_mode ||				    !zone_reclaim(zone, gfp_mask, order))					goto this_zone_full;			}		}		page = buffered_rmqueue(preferred_zone, zone, order, gfp_mask);		if (page)			break;this_zone_full:		if (NUMA_BUILD)			zlc_mark_zone_full(zonelist, z);try_next_zone:		if (NUMA_BUILD && !did_zlc_setup) {			/* we do zlc_setup after the first zone is tried */			allowednodes = zlc_setup(zonelist, alloc_flags);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -