⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 005_mm_bootmem_c.html

📁 重读linux 2.4.2o所写的笔记
💻 HTML
📖 第 1 页 / 共 2 页
字号:
      flow: static(header);    }    /* used to insert page numbers */    div.google_header::before, div.google_footer::before {      position: absolute;      top: 0;    }    div.google_footer {      flow: static(footer);    }    /* always consider this element at the start of the doc */    div#google_footer {      flow: static(footer, start);    }    span.google_pagenumber {      content: counter(page);    }    span.google_pagecount {      content: counter(pages);    }  }  @page {    @top {      content: flow(header);    }    @bottom {      content: flow(footer);    }  }  /* end default print css */ /* custom css *//* end custom css */  /* ui edited css */    body {    font-family: Verdana;        font-size: 10.0pt;    line-height: normal;    background-color: #ffffff;  }    .documentBG {    background-color: #ffffff;  }  /* end ui edited css */</style>   </head>  <body  revision="dcbsxfpf_46p2hgnhk:4">      <table align=center cellpadding=0 cellspacing=0 height=5716 width=768>
  <tbody>
  <tr>
    <td height=5716 valign=top width=100%>
      <pre>2005-10-24<br>mm/bootmem.c    <br>   <br>   上次对启动过程有个简单总结,下面再次关注一下其中的bootmem部分。<br>setup.S-&gt;asmlinkage void __init start_kernel(void) (init/main.c)<br>  |<br>  +--&gt;setup_arch ---&gt;  处理e820内存报告<br>                --&gt;   关于内存的提示信息<br>                ---&gt;  初始化bootmem (init_bootmem)<br>                ---&gt;  paging_init--+<br>                         +-------+<br>  |                      +--&gt; pagetable_init(含fix map,vmalloc init)<br> \ /                     +--&gt; load cr3<br>  .                      +--&gt; kmap_init                        <br>  .                      +--&gt; free_area_init(zone-buddy初始化)<br>  .             ---&gt;smp,apic,roms等处理<br>  +--&gt; idt gate modules,kmem_cache_init<br>  |<br>  +--&gt; mem_init --&gt;free_all_bootmem buddy 得到页面控制权<br>  +<br>  +--&gt; proc_root_init,fork_init, ipc,inode<br>  +--&gt; smp_init<br>  +<br>  +--&gt; 创建kernel thread, init (init/main.c-&gt;函数init)<br>                +---&gt;do_basic_setup<br>                       ----&gt;init pci,mtrr,sysctl,mca....<br>                       ----&gt;filesystem_setup<br>                       ----&gt;mount_root (关注...)<br>                       ----&gt;......<br>                +---&gt;  free_initmem<br>                +---&gt;  打开console<br>                +---&gt;execve("/sbin/init",argv_init,envp_init);<br>	        +---&gt;execve("/etc/init",argv_init,envp_init);<br>	        +---&gt;execve("/bin/init",argv_init,envp_init);<br>	        +---&gt;execve("/bin/sh",argv_init,envp_init);<br><br>    首先是setup_arch(arch/i386/kernel/setup.c), <br>void __init setup_arch(char **cmdline_p)<br>{<br>  .....<br>  	/*<br>	 * partially used pages are not usable - thus<br>	 * we are rounding upwards:<br>	 */<br>	start_pfn = PFN_UP(__pa(&amp;_end));  /*<br>	                                   * boot mem 只能使用_end之后<br>	                                   * 的内存<br>	                                   */<br>        //接着从e820报告中找最高地址的ram页面<br>        //其pfn赋值给max_pfn,代码略<br>        <br>        //然后的代码寻找 max_low_pfn, highstart_pfn,highend_pfn<br>        //逻辑简单,不再罗列<br>        <br>	/*<br>	 * Initialize the boot-time allocator (with low memory only):<br>	 */<br>	/*<br>	 * 初始化boot mem,只使用 low memory<br>	 */<br>	bootmap_size = init_bootmem(start_pfn, max_low_pfn);<br>	<br>	/*<br>	 * Register fully available low RAM pages with the bootmem allocator.<br>	 */<br>	for (i = 0; i &lt; e820.nr_map; i++) { //从e820中寻找ram<br>		unsigned long curr_pfn, last_pfn, size;<br> 		/*<br>		 * Reserve usable low memory<br>		 */<br>		if (e820.map[i].type != E820_RAM)<br>			continue;<br>		/*<br>		 * We are rounding up the start address of usable memory:<br>		 */<br>		curr_pfn = PFN_UP(e820.map[i].addr);<br>		if (curr_pfn &gt;= max_low_pfn)<br>			continue;<br>		/*<br>		 * ... and at the end of the usable range downwards:<br>		 */<br>		last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);<br><br>		if (last_pfn &gt; max_low_pfn)<br>			last_pfn = max_low_pfn;<br><br>		/*<br>		 * .. finally, did all the rounding and playing<br>		 * around just make the area go away?<br>		 */<br>		if (last_pfn &lt;= curr_pfn)<br>			continue;<br><br>		size = last_pfn - curr_pfn;<br>		/*<br>		 * 注册ram到bootmem(标记页面为free)<br>		 */<br>		free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));<br>	}<br><br>        /*<br>	 * Reserve the bootmem bitmap itself as well. We do this in two<br>	 * steps (first step was init_bootmem()) because this catches<br>	 * the (very unlikely) case of us accidentally initializing the<br>	 * bootmem allocator with an invalid RAM area.<br>	 */<br>	 /* 保留从物理地址1M开始(内核加载地址),<br>	  * 大小是内核image+bootmem bitmap 的物理内存<br>	  */<br>	reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +<br>			 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));<br><br>	/*<br>	 * reserve physical page 0 - it's a special BIOS page on many boxes,<br>	 * enabling clean reboots, SMP operation, laptop functions.<br>	 */<br>	 reserve_bootmem(0, PAGE_SIZE);  /*保留page pfn 0*/<br>	 <br>	 ..........<br>    <br><br>}<br><br>    然后在mem_init调用free_all_bootmem前就可以使用bootmem分配内存了.<br>当mem_init调用此函数后,buddy系统可以工作了.bootmem的使命即告终结.<br><br>    bootmem的管理结构如下:<br>    typedef struct bootmem_data {<br>	unsigned long node_boot_start; /*所能看到的page的起始地址*/<br>	unsigned long node_low_pfn;  /*此node bootmme可管理的最大pfn*/<br>	void *node_bootmem_map;   /*bootmem bit位图,一般是可以使用的起始地址(除去内核image)*/<br>	unsigned long last_offset; /*上次分配的内存的结束地址在last_pos内的偏移*/<br>	unsigned long last_pos; /*上次分配的内存所使用的最后一个页面的pfn*/<br>} bootmem_data_t;<br>    bootmem_data_t属于node, pg_data_t.<br>    在NUMA系统中,每个节点属于一个node,pgdat.<br><br>typedef struct pglist_data {<br>	zone_t node_zones[MAX_NR_ZONES];<br>	zonelist_t node_zonelists[NR_GFPINDEX];<br>	struct page *node_mem_map;  /*NUMA系统中的page结构数组,不再是<br>	                             *全局变量mem_map了<br>	                             */<br>	unsigned long *valid_addr_bitmap;<br>	struct bootmem_data *bdata;<br>	unsigned long node_start_paddr;<br>	unsigned long node_start_mapnr;<br>	unsigned long node_size;<br>	int node_id;<br>	struct pglist_data *node_next;<br>} pg_data_t;<br>     <br>    NUMA系统中的page结构数组,不再是局变量mem_map了,而是每个zone的<br>pgdat.node_mem_map.<br>    linux2.4.0中对NUMA的处理比较晦涩,不如直接看2.6的代码. 在2.6中看宏<br>#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) &gt;&gt; PAGE_SHIFT)<br>     <br>    在NUMA系统pfn_to_page定义如下:<br> #define pfn_to_page(pfn)					\<br>({								\<br>	unsigned long __pfn = pfn;				\<br>	int __node  = pfn_to_nid(__pfn);			\<br>	&amp;node_mem_map(__node)[node_localnr(__pfn,__node)];	\<br>})<br>    <br>#define node_mem_map(nid)	(NODE_DATA(nid)-&gt;node_mem_map)<br><br>    而pfn_to_nid在mmzone.h定义为<br>/*<br> * generic node memory support, the following assumptions apply:<br> *<br> * 1) memory comes in 256Mb contigious chunks which are either present or not<br> * 2) we will not have more than 64Gb in total<br> *<br> * for now assume that 64Gb is max amount of RAM for whole system<br> *    64Gb / 4096bytes/page = 16777216 pages<br> */<br>#define MAX_NR_PAGES 16777216<br>#define MAX_ELEMENTS 256<br>#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)<br><br>extern s8 physnode_map[];<br><br>static inline int pfn_to_nid(unsigned long pfn)<br>{<br>#ifdef CONFIG_NUMA<br>	return((int) physnode_map[(pfn) / PAGES_PER_ELEMENT]);<br>#else<br>	return 0;<br>#endif<br>}<br><br>     从其中注释,容易看懂pgdat之作用,以及zone的划分方式.<br>  <br>     <br>     bootmem.c涉及到的函数应该不难,有几个相关的注释,只是罗列于此,以<br>保分析完整.<br><br><br><br><br>/* return the number of _pages_ that will be allocated for the boot bitmap */<br>unsigned long __init bootmem_bootmap_pages (unsigned long pages)<br>{<br>	unsigned long mapsize;<br><br>	mapsize = (pages+7)/8; //每页用一个bit管理,转换为字节,宁多勿少<br>	mapsize = (mapsize + ~PAGE_MASK) &amp; PAGE_MASK; //字节数安4k对其,宁多勿少 <br>	mapsize &gt;&gt;= PAGE_SHIFT; //换成页面个数<br><br>	return mapsize;  //pages 个页面需要mapsize个页面来管理<br>}<br><br>/*<br> * Called once to set up the allocator itself.<br> * mapstart: bootmem所能够"管理"页面的起始pfn (i386 就是 内核_end以上) <br> * start: 此node的起始pfn,登记到位图,但只能从mapstart开始使用<br> * end  : bootmem所管理的最后一个页面的pfn<br> */<br>static unsigned long __init init_bootmem_core (pg_data_t *pgdat,<br>	unsigned long mapstart, unsigned long start, unsigned long end)<br>{<br>	bootmem_data_t *bdata = pgdat-&gt;bdata;<br>	unsigned long mapsize = ((end - start)+7)/8;<br>	           //每页用一个bit管理,转换为字节,宁多勿少<br><br>	pgdat-&gt;node_next = pgdat_list; //pglist_datag,NUMA , 每node一个pgdat<br>	pgdat_list = pgdat;<br><br>	mapsize = (mapsize + (sizeof(long) - 1UL)) &amp; ~(sizeof(long) - 1UL);<br><br>	bdata-&gt;node_bootmem_map = phys_to_virt(mapstart &lt;&lt; PAGE_SHIFT);<br>	    //只设置了管理位图的起始地址,需要明确调用reserve_bootmem保留内核<br>	    //和boot mem 自己使用的内存<br><br>	bdata-&gt;node_boot_start = (start &lt;&lt; PAGE_SHIFT);<br>	bdata-&gt;node_low_pfn = end;<br><br>	/*<br>	 * Initially all pages are reserved - setup_arch() has to<br>	 * register free RAM areas explicitly.<br>	 * 开始都处于已使用状态,需要setup_arch 注册ram页面<br>	 */<br>	memset(bdata-&gt;node_bootmem_map, 0xff, mapsize);<br><br>	return mapsize;<br>}<br><br><br>/*<br> * We 'merge' subsequent allocations to save space. We might 'lose'<br> * some fraction of a page if allocations cannot be satisfied due to<br> * size constraints on boxes where there is physical RAM space<br> * fragmentation - in these cases * (mostly large memory boxes) this<br> * is not a problem.<br> *<br> * On low memory boxes we get it right in 100% of the cases.<br> */<br><br>/*<br> * alignment has to be a power of 2 value.<br> */<br> /*<br>  *  align : 按align对齐<br>  *  goal:   请求目标地址(以上的)内存<br>  */<br>static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, <br>	unsigned long size, unsigned long align, unsigned long goal)<br>{<br>	unsigned long i, start = 0; //start is pfn<br>	void *ret;<br>	unsigned long offset, remaining_size;<br>	unsigned long areasize, preferred, incr;//areasize is pfn number<br>	unsigned long eidx = bdata-&gt;node_low_pfn - (bdata-&gt;node_boot_start &gt;&gt;<br>							PAGE_SHIFT);<br><br>	if (!size) BUG();<br><br>	/*<br>	 * We try to allocate bootmem pages above 'goal'<br>	 * first, then we try to allocate lower pages.<br>	 */<br>	if (goal &amp;&amp; (goal &gt;= bdata-&gt;node_boot_start) &amp;&amp; <br>			((goal &gt;&gt; PAGE_SHIFT) &lt; bdata-&gt;node_low_pfn)) {<br>		preferred = goal - bdata-&gt;node_boot_start;<br>	} else<br>		preferred = 0;<br><br>    /* 按要求对齐 */<br>	preferred = ((preferred + align - 1) &amp; ~(align - 1)) &gt;&gt; PAGE_SHIFT; <br>	areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; /*size 按page对齐,up round*/<br>	incr = align &gt;&gt; PAGE_SHIFT ? : 1; /*按对齐要求计算每次步进几个pfn*/<br><br>restart_scan:<br>	for (i = preferred; i &lt; eidx; i += incr) {<br>		unsigned long j;<br>		if (test_bit(i, bdata-&gt;node_bootmem_map))<br>			continue;<br>		for (j = i + 1; j &lt; i + areasize; ++j) {<br>			if (j &gt;= eidx)<br>				goto fail_block;<br>			if (test_bit (j, bdata-&gt;node_bootmem_map))<br>				goto fail_block;<br>		}<br>		start = i;<br>		goto found; /*找到了所要求的几个连续的page*/<br>	fail_block:;<br>	}<br>	if (preferred) {<br>		preferred = 0; /*<br>不能满足goal 要求,尝试从node_boot_start开始寻找*/<br>		goto restart_scan;<br>	}<br>found:<br>	if (start &gt;= eidx)<br>		BUG();<br><br>	/*<br>	 * Is the next page of the previous allocation-end the start<br>	 * of this allocation's buffer? If yes then we can 'merge'<br>	 * the previous partial page with this allocation.<br>	 */<br>	if (align &lt;= PAGE_SIZE<br>	    &amp;&amp; bdata-&gt;last_offset &amp;&amp; bdata-&gt;last_pos+1 == start) {<br>		offset = (bdata-&gt;last_offset+align-1) &amp; ~(align-1);<br>		if (offset &gt; PAGE_SIZE)<br>			BUG();<br>		remaining_size = PAGE_SIZE-offset;<br>		if (size &lt; remaining_size) {<br>			areasize = 0;<br>			// last_pos unchanged<br>			bdata-&gt;last_offset = offset+size;<br>			ret = phys_to_virt(bdata-&gt;last_pos*PAGE_SIZE + offset +<br>						bdata-&gt;node_boot_start);<br>		} else {<br>			remaining_size = size - remaining_size;<br>			areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;<br>			ret = phys_to_virt(bdata-&gt;last_pos*PAGE_SIZE + offset +<br>						bdata-&gt;node_boot_start);<br>			bdata-&gt;last_pos = start+areasize-1;<br>			bdata-&gt;last_offset = remaining_size;<br>		}<br>		bdata-&gt;last_offset &amp;= ~PAGE_MASK;<br>	} else {<br>		bdata-&gt;last_pos = start + areasize - 1;/*start包含在内,故减1*/<br>		bdata-&gt;last_offset = size &amp; ~PAGE_MASK; /*<br>		                                         * 上次分配的结束地址在页面last_pos<br>		                                         * 内的偏移<br>		                                         */<br>		ret = phys_to_virt(start * PAGE_SIZE + bdata-&gt;node_boot_start);<br>	}<br>	/*<br>	 * Reserve the area now:<br>	 */<br>	for (i = start; i &lt; start+areasize; i++)<br>		if (test_and_set_bit(i, bdata-&gt;node_bootmem_map))<br>			BUG();<br>	memset(ret, 0, size);<br>	return ret;<br>}<br><br><br>/*<br> * 释放未使用的页面和自己使用的页面到buddy系统<br> */<br>static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)<br><br><br>    罗列的代码,其中有注释.其余函数.....算来吧,没有注释,用不着了.<br></pre>
    </td>
  </tr>
  </tbody>
</table></body></html>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -