📄 numa_64.c
字号:
/* * Generic VM initialization for x86-64 NUMA setups. * Copyright 2002,2003 Andi Kleen, SuSE Labs. */ #include <linux/kernel.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/init.h>#include <linux/bootmem.h>#include <linux/mmzone.h>#include <linux/ctype.h>#include <linux/module.h>#include <linux/nodemask.h>#include <asm/e820.h>#include <asm/proto.h>#include <asm/dma.h>#include <asm/numa.h>#include <asm/acpi.h>#ifndef Dprintk#define Dprintk(x...)#endifstruct pglist_data *node_data[MAX_NUMNODES] __read_mostly;bootmem_data_t plat_node_bdata[MAX_NUMNODES];struct memnode memnode;unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE};unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE};cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;int numa_off __initdata;unsigned long __initdata nodemap_addr;unsigned long __initdata nodemap_size;/* * Given a shift value, try to populate memnodemap[] * Returns : * 1 if OK * 0 if memnodmap[] too small (of shift too small) * -1 if node overlap or lost ram (shift too big) */static int __initpopulate_memnodemap(const struct bootnode *nodes, int numnodes, int shift){ int i; int res = -1; unsigned long addr, end; memset(memnodemap, 0xff, memnodemapsize); for (i = 0; i < numnodes; i++) { addr = nodes[i].start; end = nodes[i].end; if (addr >= end) continue; if ((end >> shift) >= memnodemapsize) return 0; do { if (memnodemap[addr >> shift] != 0xff) return -1; memnodemap[addr >> shift] = i; addr += (1UL << shift); } while (addr < end); res = 1; } return res;}static int __init allocate_cachealigned_memnodemap(void){ unsigned long pad, pad_addr; memnodemap = memnode.embedded_map; if (memnodemapsize <= 48) return 0; pad = L1_CACHE_BYTES - 1; pad_addr = 0x8000; nodemap_size = pad + memnodemapsize; nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT, nodemap_size); if (nodemap_addr == -1UL) { printk(KERN_ERR "NUMA: Unable to allocate Memory to Node hash map\n"); nodemap_addr = nodemap_size = 0; return -1; } pad_addr = (nodemap_addr + pad) & ~pad; memnodemap = phys_to_virt(pad_addr); printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", nodemap_addr, nodemap_addr + nodemap_size); return 0;}/* * The LSB of all start and end addresses in the node map is the value of the * maximum possible shift. */static int __initextract_lsb_from_nodes (const struct bootnode *nodes, int numnodes){ int i, nodes_used = 0; unsigned long start, end; unsigned long bitfield = 0, memtop = 0; for (i = 0; i < numnodes; i++) { start = nodes[i].start; end = nodes[i].end; if (start >= end) continue; bitfield |= start; nodes_used++; if (end > memtop) memtop = end; } if (nodes_used <= 1) i = 63; else i = find_first_bit(&bitfield, sizeof(unsigned long)*8); memnodemapsize = (memtop >> i)+1; return i;}int __init compute_hash_shift(struct bootnode *nodes, int numnodes){ int shift; shift = extract_lsb_from_nodes(nodes, numnodes); if (allocate_cachealigned_memnodemap()) return -1; printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); if (populate_memnodemap(nodes, numnodes, shift) != 1) { printk(KERN_INFO "Your memory is not aligned you need to rebuild your kernel " "with a bigger NODEMAPSIZE shift=%d\n", shift); return -1; } return shift;}#ifdef CONFIG_SPARSEMEMint early_pfn_to_nid(unsigned long pfn){ return phys_to_nid(pfn << PAGE_SHIFT);}#endifstatic void * __initearly_node_mem(int nodeid, unsigned long start, unsigned long end, unsigned long size){ unsigned long mem = find_e820_area(start, end, size); void *ptr; if (mem != -1L) return __va(mem); ptr = __alloc_bootmem_nopanic(size, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)); if (ptr == NULL) { printk(KERN_ERR "Cannot find %lu bytes in node %d\n", size, nodeid); return NULL; } return ptr;}/* Initialize bootmem allocator for a node */void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end){ unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; unsigned long nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); start = round_up(start, ZONE_ALIGN); printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size); if (node_data[nodeid] == NULL) return; nodedata_phys = __pa(node_data[nodeid]); memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; /* Find a place for the bootmem map */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); bootmap = early_node_mem(nodeid, bootmap_start, end, bootmap_pages<<PAGE_SHIFT); if (bootmap == NULL) { if (nodedata_phys < start || nodedata_phys >= end) free_bootmem((unsigned long)node_data[nodeid],pgdat_size); node_data[nodeid] = NULL; return; } bootmap_start = __pa(bootmap); Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap_start >> PAGE_SHIFT, start_pfn, end_pfn); free_bootmem_with_active_regions(nodeid, end); reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);#ifdef CONFIG_ACPI_NUMA srat_reserve_add_area(nodeid);#endif node_set_online(nodeid);} /* Initialize final allocator for a zone */void __init setup_node_zones(int nodeid){ unsigned long start_pfn, end_pfn, memmapsize, limit; start_pfn = node_start_pfn(nodeid); end_pfn = node_end_pfn(nodeid); Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); /* Try to allocate mem_map at end to not fill up precious <4GB memory. */ memmapsize = sizeof(struct page) * (end_pfn-start_pfn); limit = end_pfn << PAGE_SHIFT;#ifdef CONFIG_FLAT_NODE_MEM_MAP NODE_DATA(nodeid)->node_mem_map = __alloc_bootmem_core(NODE_DATA(nodeid)->bdata, memmapsize, SMP_CACHE_BYTES, round_down(limit - memmapsize, PAGE_SIZE), limit);#endif} void __init numa_init_array(void){ int rr, i; /* There are unfortunately some poorly designed mainboards around that only connect memory to a single CPU. This breaks the 1:1 cpu->node mapping. To avoid this fill in the mapping for all possible CPUs, as the number of CPUs is not known yet. We round robin the existing nodes. */ rr = first_node(node_online_map); for (i = 0; i < NR_CPUS; i++) { if (cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); rr = next_node(rr, node_online_map); if (rr == MAX_NUMNODES) rr = first_node(node_online_map); }}#ifdef CONFIG_NUMA_EMU/* Numa emulation */char *cmdline __initdata;/* * Setups up nid to range from addr to addr + size. If the end boundary is * greater than max_addr, then max_addr is used instead. The return value is 0 * if there is additional memory left for allocation past addr and -1 otherwise. * addr is adjusted to be at the end of the node. */static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, u64 size, u64 max_addr){ int ret = 0; nodes[nid].start = *addr; *addr += size; if (*addr >= max_addr) { *addr = max_addr; ret = -1; } nodes[nid].end = *addr; node_set(nid, node_possible_map); printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, nodes[nid].start, nodes[nid].end, (nodes[nid].end - nodes[nid].start) >> 20); return ret;}/* * Splits num_nodes nodes up equally starting at node_start. The return value * is the number of nodes split up and addr is adjusted to be at the end of the * last node allocated. */static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, u64 max_addr, int node_start, int num_nodes){ unsigned int big; u64 size; int i; if (num_nodes <= 0) return -1; if (num_nodes > MAX_NUMNODES) num_nodes = MAX_NUMNODES; size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / num_nodes; /* * Calculate the number of big nodes that can be allocated as a result * of consolidating the leftovers. */ big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -