discontig.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 696 行 · 第 1/2 页

C
696
字号
/* * Copyright (c) 2000, 2003 Silicon Graphics, Inc.  All rights reserved. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Tony Luck <tony.luck@intel.com> * Copyright (c) 2002 NEC Corp. * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> *//* * Platform initialization for Discontig Memory */#include <linux/kernel.h>#include <linux/mm.h>#include <linux/swap.h>#include <linux/bootmem.h>#include <linux/acpi.h>#include <linux/efi.h>#include <asm/pgalloc.h>#include <asm/tlb.h>#include <asm/meminit.h>#include <asm/numa.h>#include <asm/sections.h>/* * Track per-node information needed to setup the boot memory allocator, the * per-node areas, and the real VM. */struct early_node_data {	struct ia64_node_data *node_data;	pg_data_t *pgdat;	unsigned long pernode_addr;	unsigned long pernode_size;	struct bootmem_data bootmem_data;	unsigned long num_physpages;	unsigned long num_dma_physpages;	unsigned long min_pfn;	unsigned long max_pfn;};static struct early_node_data mem_data[NR_NODES] __initdata;/** * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node * * This function will move nodes with only CPUs (no memory) * to a node with memory which is at the minimum numa_slit distance. * Any reassigments will result in the compression of the nodes * and renumbering the nid values where appropriate. * The static declarations below are to avoid large stack size which * makes the code not re-entrant. */static void __init reassign_cpu_only_nodes(void){	struct node_memblk_s *p;	int i, j, k, nnode, nid, cpu, cpunid, pxm;	u8 cslit, slit;	static DECLARE_BITMAP(nodes_with_mem, NR_NODES) __initdata;	static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;	static int node_flip[NR_NODES] __initdata;	static int old_nid_map[NR_CPUS] __initdata;	for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)		if (!test_bit(p->nid, (void *) nodes_with_mem)) {			set_bit(p->nid, (void *) nodes_with_mem);			nnode++;		}	/*	 * All nids with memory.	 */	if (nnode == numnodes)		return;	/*	 * Change nids and attempt to migrate CPU-only nodes	 * to the best numa_slit (closest neighbor) possible.	 * For reassigned CPU nodes a nid can't be arrived at	 * until after this loop because the target nid's new	 * identity might not have been established yet. So	 * new nid values are fabricated above numnodes and	 * mapped back later to their true value.	 */	for (nid = 0, i = 0; i < numnodes; i++)  {		if (test_bit(i, (void *) nodes_with_mem)) {			/*			 * Save original nid value for numa_slit			 * fixup and node_cpuid reassignments.			 */			node_flip[nid] = i;			if (i == nid) {				nid++;				continue;			}			for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)				if (p->nid == i)					p->nid = nid;			cpunid = nid;			nid++;		} else			cpunid = numnodes;		for (cpu = 0; cpu < NR_CPUS; cpu++)			if (node_cpuid[cpu].nid == i) {				/*				 * For nodes not being reassigned just				 * fix the cpu's nid and reverse pxm map				 */				if (cpunid < numnodes) {					pxm = nid_to_pxm_map[i];					pxm_to_nid_map[pxm] =					          node_cpuid[cpu].nid = cpunid;					continue;				}				/*				 * For nodes being reassigned, find best node by				 * numa_slit information and then make a temporary				 * nid value based on current nid and numnodes.				 */				for (slit = 0xff, k = numnodes + numnodes, j = 0; j < numnodes; j++)					if (i == j)						continue;					else if (test_bit(j, (void *) nodes_with_mem)) {						cslit = numa_slit[i * numnodes + j];						if (cslit < slit) {							k = numnodes + j;							slit = cslit;						}					}				/* save old nid map so we can update the pxm */				old_nid_map[cpu] = node_cpuid[cpu].nid;				node_cpuid[cpu].nid = k;			}	}	/*	 * Fixup temporary nid values for CPU-only nodes.	 */	for (cpu = 0; cpu < NR_CPUS; cpu++)		if (node_cpuid[cpu].nid == (numnodes + numnodes)) {			pxm = nid_to_pxm_map[old_nid_map[cpu]];			pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;		} else {			for (i = 0; i < nnode; i++) {				if (node_flip[i] != (node_cpuid[cpu].nid - numnodes))					continue;				pxm = nid_to_pxm_map[old_nid_map[cpu]];				pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;				break;			}		}	/*	 * Fix numa_slit by compressing from larger	 * nid array to reduced nid array.	 */	for (i = 0; i < nnode; i++)		for (j = 0; j < nnode; j++)			numa_slit_fix[i * nnode + j] =				numa_slit[node_flip[i] * numnodes + node_flip[j]];	memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));	for (i = nnode; i < numnodes; i++)		node_set_offline(i);	numnodes = nnode;	return;}/* * To prevent cache aliasing effects, align per-node structures so that they * start at addresses that are strided by node number. */#define NODEDATA_ALIGN(addr, node)						\	((((addr) + 1024*1024-1) & ~(1024*1024-1)) + (node)*PERCPU_PAGE_SIZE)/** * build_node_maps - callback to setup bootmem structs for each node * @start: physical start of range * @len: length of range * @node: node where this range resides * * We allocate a struct bootmem_data for each piece of memory that we wish to * treat as a virtually contiguous block (i.e. each node). Each such block * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down * if necessary.  Any non-existent pages will simply be part of the virtual * memmap.  We also update min_low_pfn and max_low_pfn here as we receive * memory ranges from the caller. */static int __init build_node_maps(unsigned long start, unsigned long len,				  int node){	unsigned long cstart, epfn, end = start + len;	struct bootmem_data *bdp = &mem_data[node].bootmem_data;	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;	cstart = GRANULEROUNDDOWN(start);	if (!bdp->node_low_pfn) {		bdp->node_boot_start = cstart;		bdp->node_low_pfn = epfn;	} else {		bdp->node_boot_start = min(cstart, bdp->node_boot_start);		bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);	}	min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);	max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);	return 0;}/** * early_nr_cpus_node - return number of cpus on a given node * @node: node to check * * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been * called yet. */static int early_nr_cpus_node(int node){	int cpu, n = 0;	for (cpu = 0; cpu < NR_CPUS; cpu++)		if (node == node_cpuid[cpu].nid)			n++;	return n;}/** * find_pernode_space - allocate memory for memory map and per-node structures * @start: physical start of range * @len: length of range * @node: node where this range resides * * This routine reserves space for the per-cpu data struct, the list of * pg_data_ts and the per-node data struct.  Each node will have something like * the following in the first chunk of addr. space large enough to hold it. * *    ________________________ *   |                        | *   |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first *   |    PERCPU_PAGE_SIZE *  |     start and length big enough *   |        NR_CPUS         | *   |------------------------| *   |   local pg_data_t *    | *   |------------------------| *   |  local ia64_node_data  | *   |------------------------| *   |          ???           | *   |________________________| * * Once this space has been set aside, the bootmem maps are initialized.  We * could probably move the allocation of the per-cpu and ia64_node_data space * outside of this function and use alloc_bootmem_node(), but doing it here * is straightforward and we get the alignments we want so... */static int __init find_pernode_space(unsigned long start, unsigned long len,				     int node){	unsigned long epfn, cpu, cpus;	unsigned long pernodesize = 0, pernode, pages, mapsize;	void *cpu_data;	struct bootmem_data *bdp = &mem_data[node].bootmem_data;	epfn = (start + len) >> PAGE_SHIFT;	pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;	/*	 * Make sure this memory falls within this node's usable memory	 * since we may have thrown some away in build_maps().	 */	if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn)		return 0;	/* Don't setup this node's local space twice... */	if (mem_data[node].pernode_addr)		return 0;	/*	 * Calculate total size needed, incl. what's necessary	 * for good alignment and alias prevention.	 */	cpus = early_nr_cpus_node(node);	pernodesize += PERCPU_PAGE_SIZE * cpus;	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));	pernodesize = PAGE_ALIGN(pernodesize);	pernode = NODEDATA_ALIGN(start, node);	/* Is this range big enough for what we want to store here? */	if (start + len > (pernode + pernodesize + mapsize)) {		mem_data[node].pernode_addr = pernode;		mem_data[node].pernode_size = pernodesize;		memset(__va(pernode), 0, pernodesize);		cpu_data = (void *)pernode;		pernode += PERCPU_PAGE_SIZE * cpus;		mem_data[node].pgdat = __va(pernode);		pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));		mem_data[node].node_data = __va(pernode);		pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));		mem_data[node].pgdat->bdata = bdp;		pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));		/*		 * Copy the static per-cpu data into the region we		 * just set aside and then setup __per_cpu_offset		 * for each CPU on this node.		 */		for (cpu = 0; cpu < NR_CPUS; cpu++) {			if (node == node_cpuid[cpu].nid) {				memcpy(__va(cpu_data), __phys_per_cpu_start,				       __per_cpu_end - __per_cpu_start);				__per_cpu_offset[cpu] = (char*)__va(cpu_data) -					__per_cpu_start;				cpu_data += PERCPU_PAGE_SIZE;			}		}	}	return 0;}/** * free_node_bootmem - free bootmem allocator memory for use * @start: physical start of range * @len: length of range * @node: node where this range resides * * Simply calls the bootmem allocator to free the specified ranged from * the given pg_data_t's bdata struct.  After this function has been called * for all the entries in the EFI memory map, the bootmem allocator will

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?