📄 vmscan.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	 * OK, so we have swap space and a fair amount of page cache	 * pages.  We use the recently rotated / recently scanned	 * ratios to determine how valuable each cache is.	 *	 * Because workloads change over time (and to avoid overflow)	 * we keep these statistics as a floating average, which ends	 * up weighing recent references more than old ones.	 *	 * anon in [0], file in [1]	 */	if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {		spin_lock_irq(&zone->lru_lock);		reclaim_stat->recent_scanned[0] /= 2;		reclaim_stat->recent_rotated[0] /= 2;		spin_unlock_irq(&zone->lru_lock);	}	if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {		spin_lock_irq(&zone->lru_lock);		reclaim_stat->recent_scanned[1] /= 2;		reclaim_stat->recent_rotated[1] /= 2;		spin_unlock_irq(&zone->lru_lock);	}	/*	 * With swappiness at 100, anonymous and file have the same priority.	 * This scanning priority is essentially the inverse of IO cost.	 */	anon_prio = sc->swappiness;	file_prio = 200 - sc->swappiness;	/*	 * The amount of pressure on anon vs file pages is inversely	 * proportional to the fraction of recently scanned pages on	 * each list that were recently referenced and in active use.	 */	ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);	ap /= reclaim_stat->recent_rotated[0] + 1;	fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);	fp /= reclaim_stat->recent_rotated[1] + 1;	/* Normalize to percentages */	percent[0] = 100 * ap / (ap + fp + 1);	percent[1] = 100 - percent[0];}/* * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim. */static void shrink_zone(int priority, struct zone *zone,				struct scan_control *sc){	unsigned long nr[NR_LRU_LISTS];	unsigned long nr_to_scan;	unsigned long percent[2];	/* anon @ 0; file @ 1 */	enum lru_list l;	unsigned long nr_reclaimed = sc->nr_reclaimed;	unsigned long swap_cluster_max = sc->swap_cluster_max;	get_scan_ratio(zone, sc, percent);	for_each_evictable_lru(l) {		int file = is_file_lru(l);		int scan;		scan = zone_nr_pages(zone, sc, l);		if (priority) {			scan >>= priority;			scan = (scan * percent[file]) / 100;		}		if (scanning_global_lru(sc)) {			zone->lru[l].nr_scan += scan;			nr[l] = zone->lru[l].nr_scan;			if (nr[l] >= swap_cluster_max)				zone->lru[l].nr_scan = 0;			else				nr[l] = 0;		} else			nr[l] = scan;	}	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||					nr[LRU_INACTIVE_FILE]) {		for_each_evictable_lru(l) {			if (nr[l]) {				nr_to_scan = min(nr[l], swap_cluster_max);				nr[l] -= nr_to_scan;				nr_reclaimed += shrink_list(l, nr_to_scan,							    zone, sc, priority);			}		}		/*		 * On large memory systems, scan >> priority can become		 * really large. This is fine for the starting priority;		 * we want to put equal scanning pressure on each zone.		 * However, if the VM has a harder time of freeing pages,		 * with multiple processes reclaiming pages, the total		 * freeing target can get unreasonably large.		 */		if (nr_reclaimed > swap_cluster_max &&			priority < DEF_PRIORITY && !current_is_kswapd())			break;	}	sc->nr_reclaimed = nr_reclaimed;	/*	 * Even if we did not try to evict anon pages at all, we want to	 * rebalance the anon lru active/inactive ratio.	 */	if (inactive_anon_is_low(zone, sc))		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);	throttle_vm_writeout(sc->gfp_mask);}/* * This is the direct reclaim path, for page-allocating processes.  We only * try to reclaim pages from zones which will satisfy the caller's allocation * request. * * We reclaim from a zone even if that zone is over pages_high.  Because: * a) The caller may be trying to free *extra* pages to satisfy a higher-order *    allocation or * b) The zones may be over pages_high but they must go *over* pages_high to *    satisfy the `incremental min' zone defense algorithm. * * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. */static void shrink_zones(int priority, struct zonelist *zonelist,					struct scan_control *sc){	enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);	struct zoneref *z;	struct zone *zone;	sc->all_unreclaimable = 1;	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {		if (!populated_zone(zone))			continue;		/*		 * Take care memory controller reclaiming has small influence		 * to global LRU.		 */		if (scanning_global_lru(sc)) {			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))				continue;			note_zone_scanning_priority(zone, priority);			if (zone_is_all_unreclaimable(zone) &&						priority != DEF_PRIORITY)				continue;	/* Let kswapd poll it */			sc->all_unreclaimable = 0;		} else {			/*			 * Ignore cpuset limitation here. We just want to reduce			 * # of used pages by us regardless of memory shortage.			 */			sc->all_unreclaimable = 0;			mem_cgroup_note_reclaim_priority(sc->mem_cgroup,							priority);		}		shrink_zone(priority, zone, sc);	}}/* * This is the main entry point to direct page reclaim. * * If a full scan of the inactive list fails to free enough memory then we * are "out of memory" and something needs to be killed. * * If the caller is !__GFP_FS then the probability of a failure is reasonably * high - the zone may be full of dirty or under-writeback pages, which this * caller can't do much about.  We kick pdflush and take explicit naps in the * hope that some of these pages can be written.  But if the allocating task * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. * * returns:	0, if no pages reclaimed * 		else, the number of pages reclaimed */static unsigned long do_try_to_free_pages(struct zonelist *zonelist,					struct scan_control *sc){	int priority;	unsigned long ret = 0;	unsigned long total_scanned = 0;	struct reclaim_state *reclaim_state = current->reclaim_state;	unsigned long lru_pages = 0;	struct zoneref *z;	struct zone *zone;	enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);	delayacct_freepages_start();	if (scanning_global_lru(sc))		count_vm_event(ALLOCSTALL);	/*	 * mem_cgroup will not do shrink_slab.	 */	if (scanning_global_lru(sc)) {		for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))				continue;			lru_pages += zone_lru_pages(zone);		}	}	for (priority = DEF_PRIORITY; priority >= 0; priority--) {		sc->nr_scanned = 0;		if (!priority)			disable_swap_token();		shrink_zones(priority, zonelist, sc);		/*		 * Don't shrink slabs when reclaiming memory from		 * over limit cgroups		 */		if (scanning_global_lru(sc)) {			shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);			if (reclaim_state) {				sc->nr_reclaimed += reclaim_state->reclaimed_slab;				reclaim_state->reclaimed_slab = 0;			}		}		total_scanned += sc->nr_scanned;		if (sc->nr_reclaimed >= sc->swap_cluster_max) {			ret = sc->nr_reclaimed;			goto out;		}		/*		 * Try to write back as many pages as we just scanned.  This		 * tends to cause slow streaming writers to write data to the		 * disk smoothly, at the dirtying rate, which is nice.   But		 * that's undesirable in laptop mode, where we *want* lumpy		 * writeout.  So in laptop mode, write out the whole world.		 */		if (total_scanned > sc->swap_cluster_max +					sc->swap_cluster_max / 2) {			wakeup_pdflush(laptop_mode ? 0 : total_scanned);			sc->may_writepage = 1;		}		/* Take a nap, wait for some writeback to complete */		if (sc->nr_scanned && priority < DEF_PRIORITY - 2)			congestion_wait(WRITE, HZ/10);	}	/* top priority shrink_zones still had more to do? don't OOM, then */	if (!sc->all_unreclaimable && scanning_global_lru(sc))		ret = sc->nr_reclaimed;out:	/*	 * Now that we've scanned all the zones at this priority level, note	 * that level within the zone so that the next thread which performs	 * scanning of this zone will immediately start out at this priority	 * level.  This affects only the decision whether or not to bring	 * mapped pages onto the inactive list.	 */	if (priority < 0)		priority = 0;	if (scanning_global_lru(sc)) {		for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))				continue;			zone->prev_priority = priority;		}	} else		mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);	delayacct_freepages_end();	return ret;}unsigned long try_to_free_pages(struct zonelist *zonelist, int order,								gfp_t gfp_mask){	struct scan_control sc = {		.gfp_mask = gfp_mask,		.may_writepage = !laptop_mode,		.swap_cluster_max = SWAP_CLUSTER_MAX,		.may_swap = 1,		.swappiness = vm_swappiness,		.order = order,		.mem_cgroup = NULL,		.isolate_pages = isolate_pages_global,	};	return do_try_to_free_pages(zonelist, &sc);}#ifdef CONFIG_CGROUP_MEM_RES_CTLRunsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,					   gfp_t gfp_mask,					   bool noswap,					   unsigned int swappiness){	struct scan_control sc = {		.may_writepage = !laptop_mode,		.may_swap = 1,		.swap_cluster_max = SWAP_CLUSTER_MAX,		.swappiness = swappiness,		.order = 0,		.mem_cgroup = mem_cont,		.isolate_pages = mem_cgroup_isolate_pages,	};	struct zonelist *zonelist;	if (noswap)		sc.may_swap = 0;	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);	zonelist = NODE_DATA(numa_node_id())->node_zonelists;	return do_try_to_free_pages(zonelist, &sc);}#endif/* * For kswapd, balance_pgdat() will work across all this node's zones until * they are all at pages_high. * * Returns the number of pages which were actually freed. * * There is special handling here for zones which are full of pinned pages. * This can happen if the pages are all mlocked, or if they are all used by * device drivers (say, ZONE_DMA).  Or if they are all in use by hugetlb. * What we do is to detect the case where all pages in the zone have been * scanned twice and there has been zero successful reclaim.  Mark the zone as * dead and from now on, only perform a short scan.  Basically we're polling * the zone for when the problem goes away. * * kswapd scans the zones in the highmem->normal->dma direction.  It skips * zones which have free_pages > pages_high, but once a zone is found to have * free_pages <= pages_high, we scan that zone and the lower zones regardless * of the number of free pages in the lower zones.  This interoperates with * the page allocator fallback scheme to ensure that aging of pages is balanced * across the zones. */static unsigned long balance_pgdat(pg_data_t *pgdat, int order){	int all_zones_ok;	int priority;	int i;	unsigned long total_scanned;	struct reclaim_state *reclaim_state = current->reclaim_state;	struct scan_control sc = {		.gfp_mask = GFP_KERNEL,		.may_swap = 1,		.swap_cluster_max = SWAP_CLUSTER_MAX,		.swappiness = vm_swappiness,		.order = order,		.mem_cgroup = NULL,		.isolate_pages = isolate_pages_global,	};	/*	 * temp_priority is used to remember the scanning priority at which	 * this zone was successfully refilled to free_pages == pages_high.	 */	int temp_priority[MAX_NR_ZONES];loop_again:	total_scanned = 0;	sc.nr_reclaimed = 0;	sc.may_writepage = !laptop_mode;	count_vm_event(PAGEOUTRUN);	for (i = 0; i < pgdat->nr_zones; i++)		temp_priority[i] = DEF_PRIORITY;	for (priority = DEF_PRIORITY; priority >= 0; priority--) {		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */		unsigned long lru_pages = 0;		/* The swap token gets in the way of swapout... */		if (!priority)			disable_swap_token();		all_zones_ok = 1;		/*		 * Scan in the highmem->dma direction for the highest		 * zone which needs scanning		 */		for (i = pgdat->nr_zones - 1; i >= 0; i--) {			struct zone *zone = pgdat->node_zones + i;			if (!populated_zone(zone))				continue;			if (zone_is_all_unreclaimable(zone) &&			    priority != DEF_PRIORITY)				continue;			/*			 * Do some background aging of the anon list, to give			 * pages a chance to be referenced before reclaiming.			 */			if (inactive_anon_is_low(zone, &sc))				shrink_active_list(SWAP_CLUSTER_MAX, zone,							&sc, priority, 0);			if (!zone_watermark_ok(zone, order, zone->pages_high,					       0, 0)) {				end_zone = i;				break;			}		}		if (i < 0)			goto out;		for (i = 0; i <= end_zone; i++) {			struct zone *zone = pgdat->node_zones + i;			lru_pages += zone_lru_pages(zone);		}		/*		 * Now scan the zone in the dma->highmem direction, stopping		 * at the last zone which needs scanning.		 *		 * We do this because the page allocator works in the opposite		 * direction.  This prevents the page allocator from allocating		 * pages behind kswapd's direction of progress, which would		 * cause too much scanning of the lower zones.		 */		for (i = 0; i <= end_zone; i++) {			struct zone *zone = pgdat->node_zones + i;			int nr_slab;			if (!populated_zone(zone))				continue;			if (zone_is_all_unreclaimable(zone) &&					priority != DEF_PRIORITY)				continue;			if (!zone_watermark_ok(zone, order, zone->pages_high,					       end_zone, 0))				all_zones_ok = 0;			temp_priority[i] = priority;			sc.nr_scanned = 0;			note_zone_scanning_priority(zone, priority);			/*			 * We put equal pressure on every zone, unless one			 * zone has way too many pages free already.			 */			if (!zone_watermark_ok(zone, order, 8*zone->pages_high,						end_zone, 0))				shrink_zone(priority, zone, &sc);			reclaim_state->reclaimed_slab = 0;			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,						lru_pages);			sc.nr_reclaimed += reclaim_state->reclaimed_slab;			total_scanned += sc.nr_scanned;			if (zone_is_all_unreclaimable(zone))
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -